Bases: InductorPass
An inductor pass with access to vLLM PassConfig.
It provides timing, logging, and dumping utilities.
Source code in vllm/compilation/vllm_inductor_pass.py
| class VllmInductorPass(InductorPass):
"""
An inductor pass with access to vLLM PassConfig.
It provides timing, logging, and dumping utilities.
"""
def __init__(self, config: VllmConfig):
self.pass_config = config.compilation_config.pass_config
self.model_dtype = config.model_config.dtype if config.model_config \
else None
self.device = config.device_config.device if config.device_config \
else None
self.pass_name = self.__class__.__name__
def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False):
lazy_format_graph_code(stage, graph.owning_module)
if stage in self.pass_config.dump_graph_stages or always:
# Make sure filename includes rank in the distributed setting
parallel = p_is_init() and get_tp_world_size() > 1
rank = f"-{get_tp_rank()}" if parallel else ""
filepath = self.pass_config.dump_graph_dir / f"{stage}{rank}.py"
logger.info("%s printing graph to %s", self.pass_name, filepath)
with open(filepath, "w") as f:
src = graph.python_code(root_module="self", verbose=True).src
# Add imports so it's not full of errors
print("import torch; from torch import device", file=f)
print(src, file=f)
def begin(self):
self._start_time = time.perf_counter_ns()
def end_and_log(self):
self._end_time = time.perf_counter_ns()
duration_ms = float(self._end_time - self._start_time) / 1.0e6
logger.debug("%s completed in %.1f ms", self.pass_name, duration_ms)
|
device
instance-attribute
device = device if device_config else None
model_dtype
instance-attribute
model_dtype = dtype if model_config else None
pass_config
instance-attribute
pass_config = pass_config
pass_name
instance-attribute
__init__
Source code in vllm/compilation/vllm_inductor_pass.py
| def __init__(self, config: VllmConfig):
self.pass_config = config.compilation_config.pass_config
self.model_dtype = config.model_config.dtype if config.model_config \
else None
self.device = config.device_config.device if config.device_config \
else None
self.pass_name = self.__class__.__name__
|
begin
Source code in vllm/compilation/vllm_inductor_pass.py
| def begin(self):
self._start_time = time.perf_counter_ns()
|
dump_graph
dump_graph(graph: Graph, stage: str, always=False)
Source code in vllm/compilation/vllm_inductor_pass.py
| def dump_graph(self, graph: torch.fx.Graph, stage: str, always=False):
lazy_format_graph_code(stage, graph.owning_module)
if stage in self.pass_config.dump_graph_stages or always:
# Make sure filename includes rank in the distributed setting
parallel = p_is_init() and get_tp_world_size() > 1
rank = f"-{get_tp_rank()}" if parallel else ""
filepath = self.pass_config.dump_graph_dir / f"{stage}{rank}.py"
logger.info("%s printing graph to %s", self.pass_name, filepath)
with open(filepath, "w") as f:
src = graph.python_code(root_module="self", verbose=True).src
# Add imports so it's not full of errors
print("import torch; from torch import device", file=f)
print(src, file=f)
|
end_and_log
Source code in vllm/compilation/vllm_inductor_pass.py
| def end_and_log(self):
self._end_time = time.perf_counter_ns()
duration_ms = float(self._end_time - self._start_time) / 1.0e6
logger.debug("%s completed in %.1f ms", self.pass_name, duration_ms)
|