Index _ | A | B | C | E | F | G | L | M | N | P | R | S | T | V | W _ __init__() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.comm.AllReduceFusionOp method) (flashinfer.comm.AllReduceFusionPattern method) (flashinfer.comm.AllReduceStrategyConfig method) (flashinfer.comm.AllReduceStrategyType method) (flashinfer.comm.CudaRTLibrary method) (flashinfer.comm.Mapping method) (flashinfer.comm.mnnvl.McastGPUBuffer method) (flashinfer.comm.mnnvl.MnnvlMemory method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.decode.CUDAGraphBatchDecodeWithPagedKVCacheWrapper method) (flashinfer.fp4_quantization.SfLayout method) (flashinfer.fused_moe.RoutingMethodType method) (flashinfer.fused_moe.WeightLayout method) (flashinfer.gemm.SegmentGEMMWrapper method) (flashinfer.logits_processor.LogitsPipe method) (flashinfer.logits_processor.LogitsProcessor method) (flashinfer.logits_processor.MinP method) (flashinfer.logits_processor.Sample method) (flashinfer.logits_processor.Softmax method) (flashinfer.logits_processor.TaggedTensor method) (flashinfer.logits_processor.Temperature method) (flashinfer.logits_processor.TensorType method) (flashinfer.logits_processor.TopK method) (flashinfer.logits_processor.TopP method) (flashinfer.mla.BatchMLAPagedAttentionWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) A alloc_and_copy_to_cuda() (in module flashinfer.comm.mnnvl) AllReduceFusionOp (class in flashinfer.comm) AllReduceFusionPattern (class in flashinfer.comm) AllReduceStrategyConfig (class in flashinfer.comm) AllReduceStrategyType (class in flashinfer.comm) append_paged_kv_cache() (in module flashinfer.page) append_paged_mla_kv_cache() (in module flashinfer.page) apply_llama31_rope() (in module flashinfer.rope) apply_llama31_rope_inplace() (in module flashinfer.rope) apply_llama31_rope_pos_ids() (in module flashinfer.rope) apply_llama31_rope_pos_ids_inplace() (in module flashinfer.rope) apply_rope() (in module flashinfer.rope) apply_rope_inplace() (in module flashinfer.rope) apply_rope_pos_ids() (in module flashinfer.rope) apply_rope_pos_ids_inplace() (in module flashinfer.rope) apply_rope_with_cos_sin_cache() (in module flashinfer.rope) apply_rope_with_cos_sin_cache_inplace() (in module flashinfer.rope) attention_flops() (in module flashinfer.testing) attention_flops_with_actual_seq_lens() (in module flashinfer.testing) attention_tb_per_sec() (in module flashinfer.testing) attention_tb_per_sec_with_actual_seq_lens() (in module flashinfer.testing) attention_tflops_per_sec() (in module flashinfer.testing) attention_tflops_per_sec_with_actual_seq_lens() (in module flashinfer.testing) B batch_deepgemm_fp8_nt_groupwise() (in module flashinfer.gemm) BatchDecodeWithPagedKVCacheWrapper (class in flashinfer.decode) BatchDecodeWithSharedPrefixPagedKVCacheWrapper (class in flashinfer.cascade) BatchMLAPagedAttentionWrapper (class in flashinfer.mla) BatchPrefillWithPagedKVCacheWrapper (class in flashinfer.prefill) BatchPrefillWithRaggedKVCacheWrapper (class in flashinfer.prefill) BatchPrefillWithSharedPrefixPagedKVCacheWrapper (class in flashinfer.cascade) begin_forward() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) bench_gpu_time() (in module flashinfer.testing) bench_gpu_time_with_cuda_event() (in module flashinfer.testing) bench_gpu_time_with_cudagraph() (in module flashinfer.testing) bench_gpu_time_with_cupti() (in module flashinfer.testing) BlockSparseAttentionWrapper (class in flashinfer.sparse) bmm_fp8() (in module flashinfer.gemm) C chain_speculative_sampling() (in module flashinfer.sampling) compute_fp4_swizzled_layout_sf_size() (in module flashinfer.comm) convert_to_block_layout() (in module flashinfer.fused_moe) create_shared_buffer() (in module flashinfer.comm) create_tensor_from_cuda_memory() (in module flashinfer.comm.mnnvl) CUDAGraphBatchDecodeWithPagedKVCacheWrapper (class in flashinfer.decode) CudaRTLibrary (class in flashinfer.comm) cudnn_batch_decode_with_kv_cache() (in module flashinfer.decode) cudnn_batch_prefill_with_kv_cache() (in module flashinfer.prefill) cutlass_fused_moe() (in module flashinfer.fused_moe) E e2m1_and_ufp8sf_scale_to_float() (in module flashinfer.fp4_quantization) end_forward() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) F forward() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) fp4_quantize() (in module flashinfer.fp4_quantization) free_shared_buffer() (in module flashinfer.comm) fused_add_rmsnorm() (in module flashinfer.norm) G gelu_and_mul() (in module flashinfer.activation) gelu_tanh_and_mul() (in module flashinfer.activation) gemm_fp8_nt_groupwise() (in module flashinfer.gemm) gemma_fused_add_rmsnorm() (in module flashinfer.norm) gemma_rmsnorm() (in module flashinfer.norm) get_batch_indices_positions() (in module flashinfer.page) group_deepgemm_fp8_nt_groupwise() (in module flashinfer.gemm) group_gemm_fp8_nt_groupwise() (in module flashinfer.gemm) group_gemm_mxfp4_nt_groupwise() (in module flashinfer.gemm) L layernorm() (in module flashinfer.norm) LogitsPipe (class in flashinfer.logits_processor) LogitsProcessor (class in flashinfer.logits_processor) M Mapping (class in flashinfer.comm) McastGPUBuffer (class in flashinfer.comm.mnnvl) merge_state() (in module flashinfer.cascade) merge_state_in_place() (in module flashinfer.cascade) merge_states() (in module flashinfer.cascade) min_p_sampling_from_probs() (in module flashinfer.sampling) MinP (class in flashinfer.logits_processor) mm_fp4() (in module flashinfer.gemm) MnnvlMemory (class in flashinfer.comm.mnnvl) mpi_barrier() (in module flashinfer.comm.trtllm_mnnvl_ar) MultiLevelCascadeAttentionWrapper (class in flashinfer.cascade) N nvfp4_batched_quantize() (in module flashinfer.fp4_quantization) nvfp4_block_scale_interleave() (in module flashinfer.fp4_quantization) nvfp4_quantize() (in module flashinfer.fp4_quantization) P pack_strided_memory() (in module flashinfer.comm) packbits() (in module flashinfer.quantization) plan() (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.mla.BatchMLAPagedAttentionWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) R reorder_rows_for_gated_act_gemm() (in module flashinfer.fused_moe) reset_workspace_buffer() (flashinfer.cascade.BatchDecodeWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.BatchPrefillWithSharedPrefixPagedKVCacheWrapper method) (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.gemm.SegmentGEMMWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) rmsnorm() (in module flashinfer.norm) RoutingMethodType (class in flashinfer.fused_moe) run() (flashinfer.cascade.MultiLevelCascadeAttentionWrapper method) (flashinfer.decode.BatchDecodeWithPagedKVCacheWrapper method) (flashinfer.gemm.SegmentGEMMWrapper method) (flashinfer.mla.BatchMLAPagedAttentionWrapper method) (flashinfer.prefill.BatchPrefillWithPagedKVCacheWrapper method) (flashinfer.prefill.BatchPrefillWithRaggedKVCacheWrapper method) (flashinfer.sparse.BlockSparseAttentionWrapper method) (flashinfer.sparse.VariableBlockSparseAttentionWrapper method) S Sample (class in flashinfer.logits_processor) sampling_from_probs() (in module flashinfer.sampling) segment_packbits() (in module flashinfer.quantization) SegmentGEMMWrapper (class in flashinfer.gemm) set_seed() (in module flashinfer.testing) SfLayout (class in flashinfer.fp4_quantization) shuffle_matrix_a() (in module flashinfer.fp4_quantization) shuffle_matrix_sf_a() (in module flashinfer.fp4_quantization) silu_and_mul() (in module flashinfer.activation) single_decode_with_kv_cache() (in module flashinfer.decode) single_prefill_with_kv_cache() (in module flashinfer.prefill) single_prefill_with_kv_cache_return_lse() (in module flashinfer.prefill) sleep_after_kernel_run() (in module flashinfer.testing) Softmax (class in flashinfer.logits_processor) split_device_green_ctx() (in module flashinfer.green_ctx) split_device_green_ctx_by_sm_count() (in module flashinfer.green_ctx) T TaggedTensor (class in flashinfer.logits_processor) Temperature (class in flashinfer.logits_processor) TensorType (class in flashinfer.logits_processor) top_k_mask_logits() (in module flashinfer.sampling) top_k_renorm_probs() (in module flashinfer.sampling) top_k_sampling_from_probs() (in module flashinfer.sampling) top_k_top_p_sampling_from_logits() (in module flashinfer.sampling) top_k_top_p_sampling_from_probs() (in module flashinfer.sampling) top_p_renorm_probs() (in module flashinfer.sampling) top_p_sampling_from_probs() (in module flashinfer.sampling) TopK (class in flashinfer.logits_processor) TopP (class in flashinfer.logits_processor) trtllm_allreduce_fusion() (in module flashinfer.comm) trtllm_batch_context_with_kv_cache() (in module flashinfer.prefill) trtllm_batch_decode_with_kv_cache() (in module flashinfer.decode) trtllm_create_ipc_workspace_for_all_reduce() (in module flashinfer.comm) trtllm_create_ipc_workspace_for_all_reduce_fusion() (in module flashinfer.comm) trtllm_custom_all_reduce() (in module flashinfer.comm) trtllm_destroy_ipc_workspace_for_all_reduce() (in module flashinfer.comm) trtllm_destroy_ipc_workspace_for_all_reduce_fusion() (in module flashinfer.comm) trtllm_fp4_block_scale_moe() (in module flashinfer.fused_moe) trtllm_fp8_block_scale_moe() (in module flashinfer.fused_moe) trtllm_fp8_per_tensor_scale_moe() (in module flashinfer.fused_moe) trtllm_lamport_initialize() (in module flashinfer.comm) trtllm_lamport_initialize_all() (in module flashinfer.comm) trtllm_mnnvl_all_reduce() (in module flashinfer.comm.trtllm_mnnvl_ar) trtllm_mnnvl_fused_allreduce_rmsnorm() (in module flashinfer.comm.trtllm_mnnvl_ar) trtllm_moe_allreduce_fusion() (in module flashinfer.comm) trtllm_moe_finalize_allreduce_fusion() (in module flashinfer.comm) V VariableBlockSparseAttentionWrapper (class in flashinfer.sparse) vllm_all_reduce() (in module flashinfer.comm) vllm_dispose() (in module flashinfer.comm) vllm_get_graph_buffer_ipc_meta() (in module flashinfer.comm) vllm_init_custom_ar() (in module flashinfer.comm) vllm_meta_size() (in module flashinfer.comm) vllm_register_buffer() (in module flashinfer.comm) vllm_register_graph_buffers() (in module flashinfer.comm) W WeightLayout (class in flashinfer.fused_moe)