Index _ | A | B | C | E | F | G | L | M | N | P | R | S | T | V | W _ __init__() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.decode.CUDAGraphBatchDecodeWithPagedKVCacheWrapper method) (flashinfer.fp4_quantization.SfLayout method) (flashinfer.fused_moe.RoutingMethodType method) (flashinfer.fused_moe.WeightLayout method) (flashinfer.gemm.SegmentGEMMWrapper method) (flashinfer.logits_processor.LogitsPipe method) (flashinfer.logits_processor.LogitsProcessor method) (flashinfer.logits_processor.MinP method) (flashinfer.logits_processor.Sample method) (flashinfer.logits_processor.Softmax method) (flashinfer.logits_processor.TaggedTensor method) (flashinfer.logits_processor.Temperature method) (flashinfer.logits_processor.TensorType method) (flashinfer.logits_processor.TopK method) (flashinfer.logits_processor.TopP method) (flashinfer.mla.BatchMLAPagedAttentionWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) A append_paged_kv_cache() (in module flashinfer.page) append_paged_mla_kv_cache() (in module flashinfer.page) apply_llama31_rope() (in module flashinfer.rope) apply_llama31_rope_inplace() (in module flashinfer.rope) apply_llama31_rope_pos_ids() (in module flashinfer.rope) apply_llama31_rope_pos_ids_inplace() (in module flashinfer.rope) apply_rope() (in module flashinfer.rope) apply_rope_inplace() (in module flashinfer.rope) apply_rope_pos_ids() (in module flashinfer.rope) apply_rope_pos_ids_inplace() (in module flashinfer.rope) apply_rope_with_cos_sin_cache() (in module flashinfer.rope) apply_rope_with_cos_sin_cache_inplace() (in module flashinfer.rope) attention_flops() (in module flashinfer.testing) attention_flops_with_actual_seq_lens() (in module flashinfer.testing) attention_tb_per_sec() (in module flashinfer.testing) attention_tb_per_sec_with_actual_seq_lens() (in module flashinfer.testing) attention_tflops_per_sec() (in module flashinfer.testing) attention_tflops_per_sec_with_actual_seq_lens() (in module flashinfer.testing) B batch_deepgemm_fp8_nt_groupwise() (in module flashinfer.gemm) BatchDecodeWithPagedKVCacheWrapper (class in flashinfer.decode) BatchDecodeWithSharedPrefixPagedKVCacheWrapper (class in flashinfer.cascade) BatchMLAPagedAttentionWrapper (class in flashinfer.mla) BatchPrefillWithPagedKVCacheWrapper (class in flashinfer.prefill) BatchPrefillWithRaggedKVCacheWrapper (class in flashinfer.prefill) BatchPrefillWithSharedPrefixPagedKVCacheWrapper (class in flashinfer.cascade) begin_forward() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) bench_gpu_time() (in module flashinfer.testing) bench_gpu_time_with_cudagraph() (in module flashinfer.testing) BlockSparseAttentionWrapper (class in flashinfer.sparse) bmm_fp8() (in module flashinfer.gemm) C chain_speculative_sampling() (in module flashinfer.sampling) convert_to_block_layout() (in module flashinfer.fused_moe) CUDAGraphBatchDecodeWithPagedKVCacheWrapper (class in flashinfer.decode) cudnn_batch_decode_with_kv_cache() (in module flashinfer.decode) cudnn_batch_prefill_with_kv_cache() (in module flashinfer.prefill) cutlass_fused_moe() (in module flashinfer.fused_moe) E e2m1_and_ufp8sf_scale_to_float() (in module flashinfer.fp4_quantization) end_forward() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) F forward() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) fp4_quantize() (in module flashinfer.fp4_quantization) fused_add_rmsnorm() (in module flashinfer.norm) G gelu_and_mul() (in module flashinfer.activation) gelu_tanh_and_mul() (in module flashinfer.activation) gemm_fp8_nt_groupwise() (in module flashinfer.gemm) gemma_fused_add_rmsnorm() (in module flashinfer.norm) gemma_rmsnorm() (in module flashinfer.norm) get_batch_indices_positions() (in module flashinfer.page) group_deepgemm_fp8_nt_groupwise() (in module flashinfer.gemm) group_gemm_fp8_nt_groupwise() (in module flashinfer.gemm) group_gemm_mxfp4_nt_groupwise() (in module flashinfer.gemm) L LogitsPipe (class in flashinfer.logits_processor) LogitsProcessor (class in flashinfer.logits_processor) M merge_state() (in module flashinfer.cascade) merge_state_in_place() (in module flashinfer.cascade) merge_states() (in module flashinfer.cascade) min_p_sampling_from_probs() (in module flashinfer.sampling) MinP (class in flashinfer.logits_processor) mm_fp4() (in module flashinfer.gemm) MultiLevelCascadeAttentionWrapper (class in flashinfer.cascade) N nvfp4_block_scale_interleave() (in module flashinfer.fp4_quantization) nvfp4_quantize() (in module flashinfer.fp4_quantization) P packbits() (in module flashinfer.quantization) plan() (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.mla.BatchMLAPagedAttentionWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) R reorder_rows_for_gated_act_gemm() (in module flashinfer.fused_moe) reset_workspace_buffer() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.gemm.SegmentGEMMWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) rmsnorm() (in module flashinfer.norm) RoutingMethodType (class in flashinfer.fused_moe) run() (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.gemm.SegmentGEMMWrapper method) (flashinfer.mla.BatchMLAPagedAttentionWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) S Sample (class in flashinfer.logits_processor) sampling_from_probs() (in module flashinfer.sampling) segment_packbits() (in module flashinfer.quantization) SegmentGEMMWrapper (class in flashinfer.gemm) set_seed() (in module flashinfer.testing) SfLayout (class in flashinfer.fp4_quantization) shuffle_matrix_a() (in module flashinfer.fp4_quantization) shuffle_matrix_sf_a() (in module flashinfer.fp4_quantization) silu_and_mul() (in module flashinfer.activation) single_decode_with_kv_cache() (in module flashinfer.decode) single_prefill_with_kv_cache() (in module flashinfer.prefill) single_prefill_with_kv_cache_return_lse() (in module flashinfer.prefill) sleep_after_kernel_run() (in module flashinfer.testing) Softmax (class in flashinfer.logits_processor) split_device_green_ctx() (in module flashinfer.green_ctx) split_device_green_ctx_by_sm_count() (in module flashinfer.green_ctx) T TaggedTensor (class in flashinfer.logits_processor) Temperature (class in flashinfer.logits_processor) TensorType (class in flashinfer.logits_processor) top_k_mask_logits() (in module flashinfer.sampling) top_k_renorm_probs() (in module flashinfer.sampling) top_k_sampling_from_probs() (in module flashinfer.sampling) top_k_top_p_sampling_from_logits() (in module flashinfer.sampling) top_k_top_p_sampling_from_probs() (in module flashinfer.sampling) top_p_renorm_probs() (in module flashinfer.sampling) top_p_sampling_from_probs() (in module flashinfer.sampling) TopK (class in flashinfer.logits_processor) TopP (class in flashinfer.logits_processor) trtllm_batch_context_with_kv_cache() (in module flashinfer.prefill) trtllm_batch_decode_with_kv_cache() (in module flashinfer.decode) trtllm_fp4_block_scale_moe() (in module flashinfer.fused_moe) trtllm_fp8_block_scale_moe() (in module flashinfer.fused_moe) trtllm_fp8_per_tensor_scale_moe() (in module flashinfer.fused_moe) V VariableBlockSparseAttentionWrapper (class in flashinfer.sparse) W WeightLayout (class in flashinfer.fused_moe)