o
    S"g                     @  sJ   d dl mZ d dlZd dlZd dlmZ d dlmZ G dd dejZdS )    )annotationsN)Image)nnc                      s   e Zd ZU dZded< d$d% fd
dZd&ddZed'ddZej	d(ddZd)ddZ
d*d+ddZed,ddZd-dd Zed.d"d#Z  ZS )/	CLIPModelTboolsave_in_rootopenai/clip-vit-base-patch32N
model_namestrreturnNonec                   s6   t    |d u r|}tj|| _tj|| _d S N)super__init__transformersr   from_pretrainedmodelCLIPProcessor	processor)selfr	   processor_name	__class__ h/mnt/skqttb/ctump_chatbot/chatbot/lib/python3.10/site-packages/sentence_transformers/models/CLIPModel.pyr      s
   
zCLIPModel.__init__c                 C  s   dS )NzCLIPModel()r   r   r   r   r   __repr__   s   zCLIPModel.__repr__intc                 C  s
   | j jjS r   r   	tokenizermodel_max_lengthr   r   r   r   max_seq_length      
zCLIPModel.max_seq_lengthvaluec                 C  s   || j j_d S r   r   )r   r#   r   r   r   r!      s   featuresdict[str, torch.Tensor]c              	   C  s   g }g }d|v r| j j|d d}| j |d }d|v rC| j j|d|dd |dd |dd |dd d	}| j |d }g }t|}t|}t|d
 D ]\}	}
|
dkrc|t	| qS|t	| qSt
| |d< |S )Npixel_values)r&      	input_idsattention_maskposition_idsoutput_attentionsoutput_hidden_states)r(   r)   r*   r+   r,   image_text_infor   sentence_embedding)r   vision_modelvisual_projection
text_modelgettext_projectioniter	enumerateappendnexttorchstackfloat)r   r$   image_embedstext_embedsvision_outputstext_outputsr.   image_featurestext_featuresidx
input_typer   r   r   forward    s.   



zCLIPModel.forwardpadding
str | boolc           
      C  s   g }g }g }t |D ]\}}t|tjr|| |d q
|| |d q
i }t|r:| jj||ddd}t|rK| jj|dd}	|	j|d< ||d< t	|S )	Nr   r'   Tpt)rD   
truncationreturn_tensors)rH   r&   r-   )
r5   
isinstancer   r6   lenr   r   image_processorr&   dict)
r   textsrD   imagestexts_valuesr-   rA   dataencodingr?   r   r   r   tokenize@   s"   


zCLIPModel.tokenizetransformers.CLIPProcessorc                 C  s   | j S r   )r   r   r   r   r   r   X   s   zCLIPModel.tokenizeroutput_pathc                 C  s   | j | | j| d S r   )r   save_pretrainedr   )r   rT   r   r   r   save\   s   zCLIPModel.save
input_pathc                 C  s
   t | dS )N)r	   )r   )rW   r   r   r   load`   r"   zCLIPModel.load)r   N)r	   r
   r   r   )r   r
   )r   r   )r#   r   r   r   )r$   r%   r   r%   )T)rD   rE   r   r%   )r   rS   )rT   r
   r   r   )rW   r
   r   r   )__name__
__module____qualname__r   __annotations__r   r   propertyr!   setterrC   rR   r   rV   staticmethodrX   __classcell__r   r   r   r   r   	   s   
 
	
 
r   )	
__future__r   r8   r   PILr   r   Moduler   r   r   r   r   <module>   s    