vllm.entrypoints.pooling.classify.protocol ¶

ClassificationRequest `module-attribute` ¶

ClassificationRequest: TypeAlias = (
    ClassificationCompletionRequest
    | ClassificationChatRequest
)

ClassificationChatRequest ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/classify/protocol.py

class ClassificationChatRequest(OpenAIBaseModel):
    model: str | None = None
    messages: list[ChatCompletionMessageParam]
    truncate_prompt_tokens: Annotated[int, Field(ge=-1)] | None = None
    user: str | None = None

    # --8<-- [start:chat-classification-extra-params]
    add_generation_prompt: bool = Field(
        default=False,
        description=(
            "If true, the generation prompt will be added to the chat template. "
            "This is a parameter used by chat template in tokenizer config of the "
            "model."
        ),
    )

    add_special_tokens: bool = Field(
        default=False,
        description=(
            "If true, special tokens (e.g. BOS) will be added to the prompt "
            "on top of what is added by the chat template. "
            "For most models, the chat template takes care of adding the "
            "special tokens so this should be set to false (as is the "
            "default)."
        ),
    )

    chat_template: str | None = Field(
        default=None,
        description=(
            "A Jinja template to use for this conversion. "
            "As of transformers v4.44, default chat template is no longer "
            "allowed, so you must provide a chat template if the tokenizer "
            "does not define one."
        ),
    )

    chat_template_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=(
            "Additional keyword args to pass to the template renderer. "
            "Will be accessible by the chat template."
        ),
    )

    mm_processor_kwargs: dict[str, Any] | None = Field(
        default=None,
        description=("Additional kwargs to pass to the HF processor."),
    )

    priority: int = Field(
        default=0,
        description=(
            "The priority of the request (lower means earlier handling; "
            "default: 0). Any priority other than 0 will raise an error "
            "if the served model does not use priority scheduling."
        ),
    )

    request_id: str = Field(
        default_factory=random_uuid,
        description=(
            "The request_id related to this request. If the caller does "
            "not set it, a random_uuid will be generated. This id is used "
            "through out the inference process and return in response."
        ),
    )
    softmax: bool | None = Field(
        default=None,
        description="softmax will be deprecated, please use use_activation instead.",
    )

    activation: bool | None = Field(
        default=None,
        description="activation will be deprecated, please use use_activation instead.",
    )

    use_activation: bool | None = Field(
        default=None,
        description="Whether to use activation for classification outputs. "
        "Default is True.",
    )
    # --8<-- [end:chat-classification-extra-params]

    def to_pooling_params(self):
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
            use_activation=get_use_activation(self),
        )

activation `class-attribute` `instance-attribute` ¶

activation: bool | None = Field(
    default=None,
    description="activation will be deprecated, please use use_activation instead.",
)

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_generation_prompt: bool = Field(
    default=False,
    description="If true, the generation prompt will be added to the chat template. This is a parameter used by chat template in tokenizer config of the model.",
)

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=False,
    description="If true, special tokens (e.g. BOS) will be added to the prompt on top of what is added by the chat template. For most models, the chat template takes care of adding the special tokens so this should be set to false (as is the default).",
)

chat_template `class-attribute` `instance-attribute` ¶

chat_template: str | None = Field(
    default=None,
    description="A Jinja template to use for this conversion. As of transformers v4.44, default chat template is no longer allowed, so you must provide a chat template if the tokenizer does not define one.",
)

chat_template_kwargs `class-attribute` `instance-attribute` ¶

chat_template_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional keyword args to pass to the template renderer. Will be accessible by the chat template.",
)

messages `instance-attribute` ¶

messages: list[ChatCompletionMessageParam]

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

mm_processor_kwargs: dict[str, Any] | None = Field(
    default=None,
    description="Additional kwargs to pass to the HF processor.",
)

model `class-attribute` `instance-attribute` ¶

model: str | None = None

priority `class-attribute` `instance-attribute` ¶

priority: int = Field(
    default=0,
    description="The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
)

request_id `class-attribute` `instance-attribute` ¶

request_id: str = Field(
    default_factory=random_uuid,
    description="The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.",
)

softmax `class-attribute` `instance-attribute` ¶

softmax: bool | None = Field(
    default=None,
    description="softmax will be deprecated, please use use_activation instead.",
)

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens: (
    Annotated[int, Field(ge=-1)] | None
) = None

use_activation `class-attribute` `instance-attribute` ¶

use_activation: bool | None = Field(
    default=None,
    description="Whether to use activation for classification outputs. Default is True.",
)

user `class-attribute` `instance-attribute` ¶

user: str | None = None

to_pooling_params ¶

to_pooling_params()

Source code in vllm/entrypoints/pooling/classify/protocol.py

def to_pooling_params(self):
    return PoolingParams(
        truncate_prompt_tokens=self.truncate_prompt_tokens,
        use_activation=get_use_activation(self),
    )

ClassificationCompletionRequest ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/classify/protocol.py

class ClassificationCompletionRequest(OpenAIBaseModel):
    model: str | None = None
    input: list[str] | str
    truncate_prompt_tokens: Annotated[int, Field(ge=-1)] | None = None
    user: str | None = None

    # --8<-- [start:classification-extra-params]
    priority: int = Field(
        default=0,
        description=(
            "The priority of the request (lower means earlier handling; "
            "default: 0). Any priority other than 0 will raise an error "
            "if the served model does not use priority scheduling."
        ),
    )
    add_special_tokens: bool = Field(
        default=True,
        description=(
            "If true (the default), special tokens (e.g. BOS) will be added to "
            "the prompt."
        ),
    )
    request_id: str = Field(
        default_factory=random_uuid,
        description=(
            "The request_id related to this request. If the caller does "
            "not set it, a random_uuid will be generated. This id is used "
            "through out the inference process and return in response."
        ),
    )
    softmax: bool | None = Field(
        default=None,
        description="softmax will be deprecated, please use use_activation instead.",
    )

    activation: bool | None = Field(
        default=None,
        description="activation will be deprecated, please use use_activation instead.",
    )

    use_activation: bool | None = Field(
        default=None,
        description="Whether to use activation for classification outputs. "
        "Default is True.",
    )
    # --8<-- [end:classification-extra-params]

    def to_pooling_params(self):
        return PoolingParams(
            truncate_prompt_tokens=self.truncate_prompt_tokens,
            use_activation=get_use_activation(self),
        )

activation `class-attribute` `instance-attribute` ¶

activation: bool | None = Field(
    default=None,
    description="activation will be deprecated, please use use_activation instead.",
)

add_special_tokens `class-attribute` `instance-attribute` ¶

add_special_tokens: bool = Field(
    default=True,
    description="If true (the default), special tokens (e.g. BOS) will be added to the prompt.",
)

input `instance-attribute` ¶

input: list[str] | str

model `class-attribute` `instance-attribute` ¶

model: str | None = None

priority `class-attribute` `instance-attribute` ¶

priority: int = Field(
    default=0,
    description="The priority of the request (lower means earlier handling; default: 0). Any priority other than 0 will raise an error if the served model does not use priority scheduling.",
)

request_id `class-attribute` `instance-attribute` ¶

request_id: str = Field(
    default_factory=random_uuid,
    description="The request_id related to this request. If the caller does not set it, a random_uuid will be generated. This id is used through out the inference process and return in response.",
)

softmax `class-attribute` `instance-attribute` ¶

softmax: bool | None = Field(
    default=None,
    description="softmax will be deprecated, please use use_activation instead.",
)

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens: (
    Annotated[int, Field(ge=-1)] | None
) = None

use_activation `class-attribute` `instance-attribute` ¶

use_activation: bool | None = Field(
    default=None,
    description="Whether to use activation for classification outputs. Default is True.",
)

user `class-attribute` `instance-attribute` ¶

user: str | None = None

to_pooling_params ¶

to_pooling_params()

Source code in vllm/entrypoints/pooling/classify/protocol.py

def to_pooling_params(self):
    return PoolingParams(
        truncate_prompt_tokens=self.truncate_prompt_tokens,
        use_activation=get_use_activation(self),
    )

ClassificationData ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/classify/protocol.py

class ClassificationData(OpenAIBaseModel):
    index: int
    label: str | None
    probs: list[float]
    num_classes: int

index `instance-attribute` ¶

index: int

label `instance-attribute` ¶

label: str | None

num_classes `instance-attribute` ¶

num_classes: int

probs `instance-attribute` ¶

probs: list[float]

ClassificationResponse ¶

Bases: OpenAIBaseModel

Source code in vllm/entrypoints/pooling/classify/protocol.py

class ClassificationResponse(OpenAIBaseModel):
    id: str = Field(default_factory=lambda: f"classify-{random_uuid()}")
    object: str = "list"
    created: int = Field(default_factory=lambda: int(time.time()))
    model: str
    data: list[ClassificationData]
    usage: UsageInfo

created `class-attribute` `instance-attribute` ¶

created: int = Field(default_factory=lambda: int(time()))

data `instance-attribute` ¶

data: list[ClassificationData]

id `class-attribute` `instance-attribute` ¶

id: str = Field(
    default_factory=lambda: f"classify-{random_uuid()}"
)

model `instance-attribute` ¶

model: str

object `class-attribute` `instance-attribute` ¶

object: str = 'list'

usage `instance-attribute` ¶

usage: UsageInfo

vllm.entrypoints.pooling.classify.protocol ¶

ClassificationRequest module-attribute ¶

ClassificationChatRequest ¶

activation class-attribute instance-attribute ¶

add_generation_prompt class-attribute instance-attribute ¶

add_special_tokens class-attribute instance-attribute ¶

chat_template class-attribute instance-attribute ¶

chat_template_kwargs class-attribute instance-attribute ¶

messages instance-attribute ¶

mm_processor_kwargs class-attribute instance-attribute ¶

model class-attribute instance-attribute ¶

priority class-attribute instance-attribute ¶

request_id class-attribute instance-attribute ¶

softmax class-attribute instance-attribute ¶

truncate_prompt_tokens class-attribute instance-attribute ¶

use_activation class-attribute instance-attribute ¶

user class-attribute instance-attribute ¶

to_pooling_params ¶

ClassificationCompletionRequest ¶

activation class-attribute instance-attribute ¶

add_special_tokens class-attribute instance-attribute ¶

input instance-attribute ¶

model class-attribute instance-attribute ¶

priority class-attribute instance-attribute ¶

request_id class-attribute instance-attribute ¶

softmax class-attribute instance-attribute ¶

truncate_prompt_tokens class-attribute instance-attribute ¶

use_activation class-attribute instance-attribute ¶

user class-attribute instance-attribute ¶

to_pooling_params ¶

ClassificationData ¶

index instance-attribute ¶

label instance-attribute ¶

num_classes instance-attribute ¶

probs instance-attribute ¶

ClassificationResponse ¶

created class-attribute instance-attribute ¶

data instance-attribute ¶

id class-attribute instance-attribute ¶

model instance-attribute ¶

object class-attribute instance-attribute ¶

usage instance-attribute ¶

ClassificationRequest `module-attribute` ¶

activation `class-attribute` `instance-attribute` ¶

add_generation_prompt `class-attribute` `instance-attribute` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

chat_template `class-attribute` `instance-attribute` ¶

chat_template_kwargs `class-attribute` `instance-attribute` ¶

messages `instance-attribute` ¶

mm_processor_kwargs `class-attribute` `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

priority `class-attribute` `instance-attribute` ¶

request_id `class-attribute` `instance-attribute` ¶

softmax `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

use_activation `class-attribute` `instance-attribute` ¶

user `class-attribute` `instance-attribute` ¶

activation `class-attribute` `instance-attribute` ¶

add_special_tokens `class-attribute` `instance-attribute` ¶

input `instance-attribute` ¶

model `class-attribute` `instance-attribute` ¶

priority `class-attribute` `instance-attribute` ¶

request_id `class-attribute` `instance-attribute` ¶

softmax `class-attribute` `instance-attribute` ¶

truncate_prompt_tokens `class-attribute` `instance-attribute` ¶

use_activation `class-attribute` `instance-attribute` ¶

user `class-attribute` `instance-attribute` ¶

index `instance-attribute` ¶

label `instance-attribute` ¶

num_classes `instance-attribute` ¶

probs `instance-attribute` ¶

created `class-attribute` `instance-attribute` ¶

data `instance-attribute` ¶

id `class-attribute` `instance-attribute` ¶

model `instance-attribute` ¶

object `class-attribute` `instance-attribute` ¶

usage `instance-attribute` ¶