/aosp_15_r20/external/mesa3d/src/gallium/drivers/vc4/ |
H A D | vc4_draw.c | 76 vc4_start_draw(struct vc4_context *vc4) in vc4_start_draw() argument 78 struct vc4_job *job = vc4->job; in vc4_start_draw() 109 job->draw_width = vc4->framebuffer.width; in vc4_start_draw() 110 job->draw_height = vc4->framebuffer.height; in vc4_start_draw() 117 struct vc4_context *vc4 = vc4_context(pctx); in vc4_predraw_check_textures() local 128 vc4_flush_jobs_writing_resource(vc4, view->texture); in vc4_predraw_check_textures() 133 vc4_emit_gl_shader_state(struct vc4_context *vc4, in vc4_emit_gl_shader_state() argument 138 struct vc4_job *job = vc4->job; in vc4_emit_gl_shader_state() 140 struct vc4_vertex_stateobj *vtx = vc4->vtx; in vc4_emit_gl_shader_state() 142 struct vc4_vertexbuf_stateobj *vertexbuf = &vc4->vertexbuf; in vc4_emit_gl_shader_state() [all …]
|
H A D | vc4_context.c | 42 struct vc4_context *vc4 = vc4_context(pctx); in vc4_flush() local 44 hash_table_foreach(vc4->jobs, entry) { in vc4_flush() 46 vc4_job_submit(vc4, job); in vc4_flush() 54 struct vc4_context *vc4 = vc4_context(pctx); in vc4_pipe_flush() local 64 drmSyncobjExportSyncFile(vc4->fd, vc4->job_syncobj, in vc4_pipe_flush() 68 struct vc4_fence *f = vc4_fence_create(vc4->screen, in vc4_pipe_flush() 69 vc4->last_emit_seqno, in vc4_pipe_flush() 89 struct vc4_context *vc4 = vc4_context(pctx); in vc4_invalidate_resource() local 94 struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs, in vc4_invalidate_resource() 107 struct vc4_context *vc4 = vc4_context(pctx); in vc4_context_destroy() local [all …]
|
H A D | vc4_job.c | 26 * Functions for submitting VC4 render jobs to the kernel. 35 vc4_job_free(struct vc4_context *vc4, struct vc4_job *job) in vc4_job_free() argument 42 _mesa_hash_table_remove_key(vc4->jobs, &job->key); in vc4_job_free() 45 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 50 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 55 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 60 _mesa_hash_table_remove_key(vc4->write_jobs, in vc4_job_free() 68 if (vc4->job == job) in vc4_job_free() 69 vc4->job = NULL; in vc4_job_free() 75 vc4_job_create(struct vc4_context *vc4) in vc4_job_create() argument [all …]
|
H A D | vc4_blit.c | 55 struct vc4_context *vc4 = vc4_context(pctx); in vc4_tile_blit() local 149 vc4_flush_jobs_reading_resource(vc4, info->src.resource); in vc4_tile_blit() 153 job = vc4_get_job(vc4, dst_surf, NULL); in vc4_tile_blit() 156 job = vc4_get_job(vc4, NULL, dst_surf); in vc4_tile_blit() 187 vc4_job_submit(vc4, job); in vc4_tile_blit() 194 vc4_blitter_save(struct vc4_context *vc4) in vc4_blitter_save() argument 196 util_blitter_save_fragment_constant_buffer_slot(vc4->blitter, in vc4_blitter_save() 197 vc4->constbuf[PIPE_SHADER_FRAGMENT].cb); in vc4_blitter_save() 198 util_blitter_save_vertex_buffers(vc4->blitter, vc4->vertexbuf.vb, in vc4_blitter_save() 199 vc4->vertexbuf.count); in vc4_blitter_save() [all …]
|
H A D | vc4_state.c | 54 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_blend_color() local 55 vc4->blend_color.f = *blend_color; in vc4_set_blend_color() 57 vc4->blend_color.ub[i] = float_to_ubyte(blend_color->color[i]); in vc4_set_blend_color() 58 vc4->dirty |= VC4_DIRTY_BLEND_COLOR; in vc4_set_blend_color() 65 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_stencil_ref() local 66 vc4->stencil_ref = stencil_ref; in vc4_set_stencil_ref() 67 vc4->dirty |= VC4_DIRTY_STENCIL_REF; in vc4_set_stencil_ref() 74 struct vc4_context *vc4 = vc4_context(pctx); in vc4_set_clip_state() local 75 vc4->clip = *clip; in vc4_set_clip_state() 76 vc4->dirty |= VC4_DIRTY_CLIP; in vc4_set_clip_state() [all …]
|
H A D | vc4_emit.c | 29 struct vc4_context *vc4 = vc4_context(pctx); in vc4_emit_state() local 30 struct vc4_job *job = vc4->job; in vc4_emit_state() 32 if (vc4->dirty & (VC4_DIRTY_SCISSOR | VC4_DIRTY_VIEWPORT | in vc4_emit_state() 34 float *vpscale = vc4->viewport.scale; in vc4_emit_state() 35 float *vptranslate = vc4->viewport.translate; in vc4_emit_state() 50 if (!vc4->rasterizer->base.scissor) { in vc4_emit_state() 56 minx = MAX2(vp_minx, vc4->scissor.minx); in vc4_emit_state() 57 miny = MAX2(vp_miny, vc4->scissor.miny); in vc4_emit_state() 58 maxx = MAX2(MIN2(vp_maxx, vc4->scissor.maxx), minx); in vc4_emit_state() 59 maxy = MAX2(MIN2(vp_maxy, vc4->scissor.maxy), miny); in vc4_emit_state() [all …]
|
H A D | vc4_register_allocate.c | 109 vc4_alloc_reg_set(struct vc4_context *vc4) in vc4_alloc_reg_set() argument 115 if (vc4->regs) in vc4_alloc_reg_set() 118 vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), false); in vc4_alloc_reg_set() 125 vc4->reg_class_any[i] = ra_alloc_contig_reg_class(vc4->regs, 1); in vc4_alloc_reg_set() 126 vc4->reg_class_a_or_b[i] = ra_alloc_contig_reg_class(vc4->regs, 1); in vc4_alloc_reg_set() 127 vc4->reg_class_a_or_b_or_acc[i] = ra_alloc_contig_reg_class(vc4->regs, 1); in vc4_alloc_reg_set() 128 vc4->reg_class_r4_or_a[i] = ra_alloc_contig_reg_class(vc4->regs, 1); in vc4_alloc_reg_set() 129 vc4->reg_class_a[i] = ra_alloc_contig_reg_class(vc4->regs, 1); in vc4_alloc_reg_set() 131 vc4->reg_class_r0_r3 = ra_alloc_contig_reg_class(vc4->regs, 1); in vc4_alloc_reg_set() 135 ra_class_add_reg(vc4->reg_class_r0_r3, i); in vc4_alloc_reg_set() [all …]
|
H A D | vc4_program.c | 48 vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, 1686 "vc4 doesn't support indirect inputs"); in ntq_emit_load_input() 1785 "vc4 doesn't support indirect outputs"); in ntq_emit_intrinsic() 1886 if (!c->vc4->screen->has_control_flow) { in ntq_emit_if() 2042 if (!c->vc4->screen->has_control_flow) { in ntq_emit_loop() 2210 vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, in vc4_shader_ntq() argument 2215 c->vc4 = vc4; in vc4_shader_ntq() 2393 vc4_generate_code(vc4, c); in vc4_shader_ntq() 2451 vc4_shader_precompile(struct vc4_context *vc4, in vc4_shader_precompile() argument 2469 vc4_get_compiled_shader(vc4, QSTAGE_FRAG, &key.base); in vc4_shader_precompile() [all …]
|
H A D | vc4_context.h | 137 * VC4_DIRTY_* flags that, when set in vc4->dirty, mean that the 190 /* Hash table key for vc4->jobs */ 425 if (unlikely(vc4->base.debug.debug_message)) \ 426 util_debug_message(&vc4->base.debug, PERF_INFO, __VA_ARGS__); \ 476 void vc4_write_uniforms(struct vc4_context *vc4, 482 int vc4_job_init(struct vc4_context *vc4); 483 int vc4_fence_context_init(struct vc4_context *vc4); 484 struct vc4_job *vc4_get_job(struct vc4_context *vc4, 487 struct vc4_job *vc4_get_job_for_fbo(struct vc4_context *vc4); 489 void vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job); [all …]
|
H A D | vc4_fence.c | 111 struct vc4_context *vc4 = vc4_context(pctx); in vc4_fence_create_fd() local 115 *fence = vc4_fence_create(vc4->screen, vc4->last_emit_seqno, in vc4_fence_create_fd() 123 struct vc4_context *vc4 = vc4_context(pctx); in vc4_fence_server_sync() local 127 sync_accumulate("vc4", &vc4->in_fence_fd, fence->fd); in vc4_fence_server_sync() 139 vc4_fence_context_init(struct vc4_context *vc4) in vc4_fence_context_init() argument 141 vc4->base.create_fence_fd = vc4_fence_create_fd; in vc4_fence_context_init() 142 vc4->base.fence_server_sync = vc4_fence_server_sync; in vc4_fence_context_init() 143 vc4->in_fence_fd = -1; in vc4_fence_context_init() 148 if (vc4->screen->has_syncobj) { in vc4_fence_context_init() 149 return drmSyncobjCreate(vc4->fd, DRM_SYNCOBJ_CREATE_SIGNALED, in vc4_fence_context_init() [all …]
|
H A D | vc4_uniforms.c | 191 vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, in vc4_write_uniforms() argument 196 struct vc4_job *job = vc4->job; in vc4_write_uniforms() 219 cl_aligned_f(&uniforms, vc4->viewport.scale[0] * 16.0f); in vc4_write_uniforms() 222 cl_aligned_f(&uniforms, vc4->viewport.scale[1] * 16.0f); in vc4_write_uniforms() 226 cl_aligned_f(&uniforms, vc4->viewport.translate[2]); in vc4_write_uniforms() 229 cl_aligned_f(&uniforms, vc4->viewport.scale[2]); in vc4_write_uniforms() 234 vc4->clip.ucp[data / 4][data % 4]); in vc4_write_uniforms() 260 u_upload_data(vc4->uploader, 0, in vc4_write_uniforms() 306 CLAMP(vc4->blend_color.f.color[uinfo->contents[i] - in vc4_write_uniforms() 313 vc4_get_format_swizzle(vc4->framebuffer.cbufs[0]->format); in vc4_write_uniforms() [all …]
|
/aosp_15_r20/external/mesa3d/docs/drivers/ |
H A D | vc4.rst | 1 VC4 title 4 Mesa's VC4 graphics driver supports multiple implementations of 11 This Mesa driver talks directly to the `VC4 12 <https://www.kernel.org/doc/html/latest/gpu/vc4.html>`__ kernel DRM 21 The VC4 driver is a nearly conformant GLES2 driver, and the hardware 32 GLES2.0, and VC4, don't have ``GL_UNSIGNED_INT`` index buffers. To support 33 them in VC4, we create a shadow copy of your index buffer with the 46 The VC4 hardware has no support for occlusion queries. GL 2.0 56 VC4 doesn't support reducing triangles/quads/polygons to lines and 67 VC4 rendering bugs should go to Mesa's GitLab `issues [all …]
|
/aosp_15_r20/external/XNNPACK/src/f32-raddstoreexpminusmax/gen/ |
H A D | avx512f-rr1-p5-scalef-x192.c | 32 const __m512 vc4 = _mm512_set1_ps(params->avx512_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() local 93 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 94 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 95 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 96 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 97 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 98 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 99 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 100 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() 101 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192() [all …]
|
H A D | avx512f-rr1-p5-scalef-x192-acc3.c | 32 const __m512 vc4 = _mm512_set1_ps(params->avx512_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() local 95 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 96 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 97 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 98 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 99 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 100 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 101 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 102 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() 103 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc3() [all …]
|
H A D | avx512f-rr1-p5-scalef-x192-acc6.c | 32 const __m512 vc4 = _mm512_set1_ps(params->avx512_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() local 98 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 99 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 100 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 101 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 102 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 103 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 104 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 105 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() 106 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc6() [all …]
|
H A D | avx512f-rr1-p5-scalef-x192-acc2.c | 32 const __m512 vc4 = _mm512_set1_ps(params->avx512_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() local 94 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 95 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 96 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 97 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 98 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 99 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 100 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 101 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() 102 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx512f_rr1_p5_scalef_x192_acc2() [all …]
|
H A D | avx2-rr1-p5-x96-acc6.c | 32 const __m256 vc4 = _mm256_load_ps(params->avx2_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() local 124 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 125 __m256 vp1 = _mm256_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 126 __m256 vp2 = _mm256_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 127 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 128 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 129 __m256 vp5 = _mm256_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 130 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 131 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() 132 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc6() [all …]
|
H A D | avx2-rr1-p5-x96-acc3.c | 32 const __m256 vc4 = _mm256_load_ps(params->avx2_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() local 121 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 122 __m256 vp1 = _mm256_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 123 __m256 vp2 = _mm256_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 124 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 125 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 126 __m256 vp5 = _mm256_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 127 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 128 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() 129 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc3() [all …]
|
H A D | avx2-rr1-p5-x96-acc2.c | 32 const __m256 vc4 = _mm256_load_ps(params->avx2_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() local 120 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 121 __m256 vp1 = _mm256_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 122 __m256 vp2 = _mm256_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 123 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 124 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 125 __m256 vp5 = _mm256_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 126 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 127 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() 128 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96_acc2() [all …]
|
H A D | avx2-rr1-p5-x96.c | 32 const __m256 vc4 = _mm256_load_ps(params->avx2_rr1_p5.c4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() local 119 __m256 vp0 = _mm256_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 120 __m256 vp1 = _mm256_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 121 __m256 vp2 = _mm256_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 122 __m256 vp3 = _mm256_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 123 __m256 vp4 = _mm256_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 124 __m256 vp5 = _mm256_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 125 __m256 vp6 = _mm256_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 126 __m256 vp7 = _mm256_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() 127 __m256 vp8 = _mm256_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddstoreexpminusmax_ukernel__avx2_rr1_p5_x96() [all …]
|
/aosp_15_r20/external/XNNPACK/src/f32-raddexpminusmax/gen/ |
H A D | avx512f-p5-scalef-x192.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() local 113 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 114 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|
H A D | avx512f-p5-scalef-x192-acc2.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc2() [all …]
|
H A D | avx512f-p5-scalef-x192-acc6.c | 34 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() local 118 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 119 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 120 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 121 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 122 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 123 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 124 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 125 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() 126 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_raddexpminusmax_ukernel__avx512f_p5_scalef_x192_acc6() [all …]
|
/aosp_15_r20/external/XNNPACK/src/f32-vscaleextexp/gen/ |
H A D | avx512f-p5-scalef-x192.c | 36 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() local 101 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 102 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 103 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 104 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 105 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 106 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 107 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 108 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() 109 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleextexp_ukernel__avx512f_p5_scalef_x192() [all …]
|
/aosp_15_r20/external/XNNPACK/src/f32-vscaleexpminusmax/gen/ |
H A D | avx512f-p5-scalef-x192.c | 35 const __m512 vc4 = _mm512_set1_ps(0x1.573A1Ap-5f); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() local 114 __m512 vp0 = _mm512_fmadd_ps(vc5, vt0, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 115 __m512 vp1 = _mm512_fmadd_ps(vc5, vt1, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 116 __m512 vp2 = _mm512_fmadd_ps(vc5, vt2, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 117 __m512 vp3 = _mm512_fmadd_ps(vc5, vt3, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 118 __m512 vp4 = _mm512_fmadd_ps(vc5, vt4, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 119 __m512 vp5 = _mm512_fmadd_ps(vc5, vt5, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 120 __m512 vp6 = _mm512_fmadd_ps(vc5, vt6, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 121 __m512 vp7 = _mm512_fmadd_ps(vc5, vt7, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() 122 __m512 vp8 = _mm512_fmadd_ps(vc5, vt8, vc4); in xnn_f32_vscaleexpminusmax_ukernel__avx512f_p5_scalef_x192() [all …]
|