# SPDX-License-Identifier: Apache-2.0# SPDX-FileCopyrightText: Copyright contributors to the vLLM projectimportosimporttimefromvllmimportLLM,SamplingParams# enable torch profiler, can also be set on cmd lineos.environ["VLLM_TORCH_PROFILER_DIR"]="./vllm_profile"# Sample prompts.prompts=["Hello, my name is","The president of the United States is","The capital of France is","The future of AI is",]# Create a sampling params object.sampling_params=SamplingParams(temperature=0.8,top_p=0.95)defmain():# Create an LLM.llm=LLM(model="facebook/opt-125m",tensor_parallel_size=1)llm.start_profile()# Generate texts from the prompts. The output is a list of RequestOutput# objects that contain the prompt, generated text, and other information.outputs=llm.generate(prompts,sampling_params)llm.stop_profile()# Print the outputs.print("-"*50)foroutputinoutputs:prompt=output.promptgenerated_text=output.outputs[0].textprint(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}")print("-"*50)# Add a buffer to wait for profiler in the background process# (in case MP is on) to finish writing profiling output.time.sleep(10)if__name__=="__main__":main()