
    ge                        d Z ddlZddlmZ ddlmZmZmZ ddlZ	ddl
mZ ddlmZmZ ddlmZmZ dd	lmZ dd
lmZmZ ddlmZ  ej        e          Ze G d de                      Z G d de	j        j        j                  Z  G d de	j        j        j                  Z! G d de	j        j        j                  Z" G d de	j        j        j                  Z# G d de	j        j        j                  Z$ G d de          Z%dS )zOTF IdeficsVision model: a copy of CLIPVisionModel using a simpler config object    N)	dataclass)OptionalTupleUnion   )get_tf_activation)TFBaseModelOutputTFBaseModelOutputWithPooling)TFPreTrainedModel
shape_list)flatten)ModelOutputlogging   )IdeficsVisionConfigc                       e Zd ZU dZdZeej                 ed<   dZ	ej        ed<   dZ
eeej                          ed<   dZeeej                          ed<   dS )TFIdeficsVisionModelOutputa  
    Base class for vision model's outputs that also contains image embeddings of the pooling of the last hidden states.

    Args:
        image_embeds (`tf.Tensor` of shape `(batch_size, output_dim)` *optional* returned when model is initialized with `with_projection=True`):
            The image embeddings obtained by applying the projection layer to the pooler_output.
        last_hidden_state (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
            Sequence of hidden-states at the output of the last layer of the model.
        hidden_states (`tuple(tf.Tensor)`, *optional*, returned when `output_hidden_states=True` is passed or when `config.output_hidden_states=True`):
            Tuple of `tf.Tensor` (one for the output of the embeddings, if the model has an embedding layer, +
            one for the output of each layer) of shape `(batch_size, sequence_length, hidden_size)`.

            Hidden-states of the model at the output of each layer plus the optional initial embedding outputs.
        attentions (`tuple(tf.Tensor)`, *optional*, returned when `output_attentions=True` is passed or when `config.output_attentions=True`):
            Tuple of `tf.Tensor` (one for each layer) of shape `(batch_size, num_heads, sequence_length,
            sequence_length)`.

            Attentions weights after the attention softmax, used to compute the weighted average in the self-attention
            heads.
    Nimage_embedslast_hidden_statehidden_states
attentions)__name__
__module____qualname____doc__r   r   tfTensor__annotations__r   r   r   r        a/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/idefics/vision_tf.pyr   r   "   s          * )-L(29%,,,#'ry'''04M8E"),-444-1Jry)*11111r    r   c                        e Zd Zdef fdZdej        dededej        fdZdd	ej        d
e	dej        fdZ
ddZ xZS )TFIdeficsVisionEmbeddingsconfigc           	          t                      j        d
i | || _        |j        | _        |j        | _        |j        | _        t          j        j	        
                    | j        | j        | j        dddd          | _        | j        | j        z  dz  | _        | j        dz   | _        t          j        j	                            | j        | j        d	          | _        d S )NFvalidchannels_lastpatch_embedding)filterskernel_sizestridesuse_biaspaddingdata_formatname   r   position_embeddingr/   r   )super__init__r$   hidden_size	embed_dim
image_size
patch_sizer   keraslayersConv2Dr(   num_patchesnum_positions	Embeddingr1   selfr$   kwargs	__class__s      r!   r4   z"TFIdeficsVisionEmbeddings.__init__@   s    ""6"""+ + +!x55NO'"  6  
  
 !Ot>1D!-1"$(/";";5I #< #
 #
r    
embeddingsheightwidthreturnc           	         t          |          d         dz
  }|                     | j                  }t          |          d         dz
  }||k    r||k    r|S |d d df         }|d d dd f         }t          |          d         }	|| j        j        z  }
|| j        j        z  }|
dz   |dz   }}
t          j        t          |                    }t          j	        |dt          |          t          |          |	f          }|
|z  }||z  }t          j        t          j        |          d         t          j                  }t          j        t          j        |          d         t          j                  }t          j        ||z  t          j                  }t          j        ||z  t          j                  }t          j                            |||gt          j        j        j                  }t          |
          t          |          d         k    s&t          |          t          |          d         k    rZt'          d	t          |
          t          |          f d
t          |          d         t          |          d         f d          t          j	        |dd|	f          }t          j        |t          j        d d f         |fd          S )Nr   r   g?r0   )sizemethodzNumber of patches for images (z/) don't match the shape of position embedding ()axis)r   r1   position_idsr$   r8   mathsqrtfloatr   reshapeintcastshapefloat32int32imageresizeResizeMethodBICUBIC
ValueErrorconcatnewaxis)r@   rC   rD   rE   r<   	pos_embedr=   class_pos_embedpatch_pos_embedr6   num_h_patchesnum_w_patchessqrt_num_positionsscale_heightscale_widthoriginal_heightoriginal_width
new_height	new_widths                      r!   interpolate_pos_encodingz2TFIdeficsVisionEmbeddings.interpolate_pos_encodingX   s    ,,Q/!3++D,=>>	"9--a014-''FeOO#AAAqD/#AAAqrrE*z**2.	$+"88!77'4s':MC<O}!Yu]';';<<*_q#>P:Q:QSVWiSjSjlu6vww$'99#&88'"(?";";A">
KK/!:!:1!=rzJJW_|;RXFF
GN[8"(CC	(//:y"9"(BWB_ * 
 

 *_"="=b"AAA=!!Z%@%@%DDDt]1C1CSEWEW0X t t0:?0K0KB0OQ[\kQlQlmoQp/qt t t   *_q"i6HIIy/"*aaa-8/JQRSSSSr    Fpixel_valuesrm   c                    t          |t                    r|d         }t          j        |d          }t	          |          \  }}}}|s<|| j        k    s|| j        k    r&t          d| d| d| j         d| j         d	          |                     |          }t          |dd	          }t          j	        | j
        t          j        t          j        d d f         |d| j        g          }t          j        ||gd
          }	|r|	|                     |	||          z   }	n|	|                     | j                  z   }	|	S )Nrn   )r   r0   r   r   permzInput image size (*z) doesn't match model (z8). You should try to set `interpolate_pos_encoding=True`r   r0   rN   )
isinstancedictr   	transposer   r7   r^   r(   r   broadcast_toclass_embeddingr`   r6   r_   rm   r1   rP   )
r@   rn   rm   
batch_sizerD   rE   num_channelspatch_embedsclass_embedsrC   s
             r!   callzTFIdeficsVisionEmbeddings.call   s   
 lD)) 	8'7L|L|DDD2<\2J2J/
FE<' 	((ET_,D,D u u u% u uu u+/?u u u  
 ++L99 |Q22 RZ!:;j!T^=\
 
 Yl;!DDD
 $ 	Q#d&C&CJPVX]&^&^^JJ#d&=&=d>O&P&PPJr    Nc                 x   | j         rd S d| _         t          j        | j        d          t          j        d d f         | _        |                     | j        fd          | _        t          | dd           ^t          j
        | j        j                  5  | j                            d d d | j        j        g           d d d            n# 1 swxY w Y   t          | dd           St          j
        | j        j                  5  | j                            d            d d d            d S # 1 swxY w Y   d S d S )NTzself.position_idsr2   rw   )rW   r/   r(   r1   )builtr   ranger=   r`   rP   
add_weightr6   rw   getattr
name_scoper(   r/   buildr$   ry   r1   r@   input_shapes     r!   r   zTFIdeficsVisionEmbeddings.build   s   : 	F
HT%7>QRRRSUS]_`_`_`S`a#dn5FM^__4*D11=t3899 Y Y$**D$dk>V+WXXXY Y Y Y Y Y Y Y Y Y Y Y Y Y Y4-t44@t6;<< 4 4'--d3334 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 A@s$   )CCCD--D14D1FN)r   r   r   r   r4   r   r   rU   rm   boolr|   r   __classcell__rB   s   @r!   r#   r#   ?   s        
2 
 
 
 
 
 
0%T29 %Tc %TRU %TZ\Zc %T %T %T %TN! ! !d !WYW` ! ! ! !F4 4 4 4 4 4 4 4r    r#   c                       e Zd ZdZ fdZdej        dedefdZ	 	 	 dd	ej        d
e	ej                 de	ej                 de	e
         deej        e	ej                 e	eej                          f         f
dZddZ xZS )TFIdeficsVisionAttentionz=Multi-headed attention from 'Attention Is All You Need' paperc                     t                      j        d
i | || _        |j        | _        |j        | _        | j        | j        z  | _        | j        | j        z  | j        k    r t          d| j         d| j         d          | j        dz  | _	        |j
        | _        t          j        j                            | j        d          | _        t          j        j                            | j        d          | _        t          j        j                            | j        d          | _        t          j        j                            | j        d	          | _        d S )Nz;embed_dim must be divisible by num_heads (got `embed_dim`: z and `num_heads`: z).g      k_projr2   v_projq_projout_projr   )r3   r4   r$   r5   r6   num_attention_heads	num_headshead_dimr^   scaleattention_dropoutdropoutr   r9   r:   Denser   r   r   r   r?   s      r!   r4   z!TFIdeficsVisionAttention.__init__   s8   ""6"""+3$.8=4>)T^;;'dn ' 'N' ' '   ]D(
/ho++DN+JJho++DN+JJho++DN+JJ--dn:-NNr    tensorseq_lenbszc           	      t    t          j        t          j        |||| j        | j        f          g d          S )Nr   r0   r   r   rp   )r   ru   rT   r   r   )r@   r   r   r   s       r!   _shapezTFIdeficsVisionAttention._shape   s7    |BJvWdndm/\]]dpdpdpqqqqr    NFr   attention_maskcausal_attention_maskoutput_attentionsrF   c           
         t          |          \  }}}|                     |          | j        z  }|                     |                     |          d|          }	|                     |                     |          d|          }
|| j        z  d| j        f}t          j	        |                     |||          |          }t          j	        |	|          }	t          j	        |
|          }
t          |	          d         }t          j
                            ||	d          }t          j                            t          j        |          || j        z  ||gd|| j        z  ||g dt          j        |                      |~t          |          |d||gk    r&t          d	|d||f dt          |                     t          j	        ||| j        ||f          |z   }t          j	        ||| j        z  ||f          }|~t          |          |d||gk    r&t          d	|d||f dt          |                     t          j	        ||| j        ||f          |z   }t          j	        ||| j        z  ||f          }t          j                            |d
          }|r?t          j	        ||| j        ||f          }t          j	        ||| j        z  ||f          }nd}t          j                            || j                  }t          j
                            ||
          }t          j                            t          j        |          || j        z  || j        gd|| j        z  || j        g dt          j        |                      t          j	        ||| j        || j        f          }t          j        |g d          }t          j	        ||||f          }|                     |          }||fS )z#Input shape: Batch x Time x ChannelrH   r   T)transpose_bz$Attention weights should be of size z	, but is )messageNz!Attention mask should be of size rN   )rater   rp   )r   r   r   r   r   r   r   r   r   rT   linalgmatmul	debuggingassert_equalrW   r^   nnsoftmaxr   ru   r   )r@   r   r   r   r   r   tgt_lenr6   query_states
key_statesvalue_states
proj_shapesrc_lenattn_weightsattn_weights_reshaped
attn_probsattn_outputs                    r!   r|   zTFIdeficsVisionAttention.call   sa    #-]";";Wi {{=11DJ>[[]!;!;REE
{{4;;}#=#=r3GGDN*B>
z$++lGS"I"I:VVZ
J77
z,
;;Z((+y''jd'SS
!!H\""4>!7G4 GC$.<PRY[b;c  G  Gnpnv  xD  oE  oE  G  G 	" 	
 	
 	
 !,/00S!Wg4NNN <a'8R < <"#899< <   :lS$.'SZ4[\\_ttL:lS4>5I7T[4\]]L%.))c1gw-GGG ya'8Ryy]ghv]w]wyy   :lS$.'SZ4[\\_mmL:lS4>5I7T[4\]]Lu}}\};; 	)
 %'J|c4>SZ\c=d$e$e!:&;cDN>RT[]d=effLL$(!U]]<dl]CC
i&&z<@@
!!H[!!4>!7DM: LC$.<PRY[_[h;i  L  Ltvt|  ~I  uJ  uJ  L  L 	" 	
 	
 	
 jsDNGT].[\\l;\\\BBBjsGY.GHHmmK00111r    c                    | j         rd S d| _         t          | dd           \t          j        | j        j                  5  | j                            | j        | j        f           d d d            n# 1 swxY w Y   t          | dd           \t          j        | j        j                  5  | j                            | j        | j        f           d d d            n# 1 swxY w Y   t          | dd           \t          j        | j	        j                  5  | j	                            | j        | j        f           d d d            n# 1 swxY w Y   t          | dd           _t          j        | j
        j                  5  | j
                            | j        | j        f           d d d            d S # 1 swxY w Y   d S d S )NTr   r   r   r   )r~   r   r   r   r   r/   r   r6   r   r   r   r   s     r!   r   zTFIdeficsVisionAttention.build  s   : 	F
44((4t{/00 D D!!4>4>"BCCCD D D D D D D D D D D D D D D44((4t{/00 D D!!4>4>"BCCCD D D D D D D D D D D D D D D44((4t{/00 D D!!4>4>"BCCCD D D D D D D D D D D D D D D4T**6t}122 F F##T^T^$DEEEF F F F F F F F F F F F F F F F F F 76sH    'A33A7:A7-'C  C$'C$'EEE'F;;F?F?)NNFr   )r   r   r   r   r4   r   r   rU   r   r   r   r   r|   r   r   r   s   @r!   r   r      s%       GGO O O O O&rRY r r3 r r r r /359,1L2 L2yL2 !+L2  (	2	L2
 $D>L2 
ry(29-xbi8H/II	JL2 L2 L2 L2\F F F F F F F Fr    r   c                   J     e Zd Z fdZdej        dej        fdZddZ xZS )TFIdeficsVisionMLPc                 :    t                      j        di | || _        t          |j                  | _        t          j        j        	                    |j
        d          | _        t          j        j        	                    |j        d          | _        d S )Nfc1r2   fc2r   )r3   r4   r$   r   
hidden_actactivation_fnr   r9   r:   r   intermediate_sizer   r5   r   r?   s      r!   r4   zTFIdeficsVisionMLP.__init__*  s    ""6""".v/@AA8?(()A(NN8?(();%(HHr    r   rF   c                     |                      |          }|                     |          }|                     |          }|S r   )r   r   r   )r@   r   s     r!   r|   zTFIdeficsVisionMLP.call1  s=    //**=99//r    Nc                    | j         rd S d| _         t          | dd           Zt          j        | j        j                  5  | j                            | j        j                   d d d            n# 1 swxY w Y   t          | dd           ]t          j        | j	        j                  5  | j	                            | j        j
                   d d d            d S # 1 swxY w Y   d S d S )NTr   r   )r~   r   r   r   r   r/   r   r$   r5   r   r   r   s     r!   r   zTFIdeficsVisionMLP.build7  sW   : 	F
4%%1tx}-- 8 8t{67778 8 8 8 8 8 8 8 8 8 8 8 8 8 84%%1tx}-- > >t{<===> > > > > > > > > > > > > > > > > > 21s$    %A11A58A5+%CC!$C!r   )	r   r   r   r4   r   r   r|   r   r   r   s   @r!   r   r   )  sw        I I I I I") 	    	> 	> 	> 	> 	> 	> 	> 	>r    r   c                        e Zd Zdef fdZ	 ddej        dej        dej        dee         de	ej                 f
d	Z
ddZ xZS )TFIdeficsVisionEncoderLayerr$   c                 j    t                      j        di | |j        | _        t	          |d          | _        t          j        j        	                    |j
        d          | _        t          |d          | _        t          j        j        	                    |j
        d          | _        d S )N	self_attnr2   layer_norm1epsilonr/   mlplayer_norm2r   )r3   r4   r5   r6   r   r   r   r9   r:   LayerNormalizationlayer_norm_epsr   r   r   r   r?   s      r!   r4   z$TFIdeficsVisionEncoderLayer.__init__D  s    ""6"""+1&{KKK8?==fF[bo=pp%f59998?==fF[bo=ppr    Fr   r   r   r   rF   c                     |}|                      |          }|                     ||||          \  }}||z   }|}|                     |          }|                     |          }||z   }|f}|r||fz  }|S )a9  
        Args:
            hidden_states (`tf.Tensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
            attention_mask (`tf.Tensor`): attention mask of size
                `(batch, 1, tgt_len, src_len)` where padding elements are indicated by very large negative values.
                `(config.encoder_attention_heads,)`.
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
        )r   r   r   r   )r   r   r   r   )r@   r   r   r   r   residualr   outputss           r!   r|   z TFIdeficsVisionEncoderLayer.callL  s    " !((77&*nn')"7/	 '5 '
 '
#| !=0 ((77// =0 " 	'&Gr    Nc                    | j         rd S d| _         t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           [t          j        | j        j                  5  | j                            d d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTr   r   )	r~   r   r   r   r   r/   r   r6   r   r   s     r!   r   z!TFIdeficsVisionEncoderLayer.buildt  s   : 	F
4--9t/455 E E &&dDN'CDDDE E E E E E E E E E E E E E E4--9t/455 E E &&dDN'CDDDE E E E E E E E E E E E E E E E E E :9s$    #A//A36A3)#CC Cr   r   )r   r   r   r   r4   r   r   r   r   r   r|   r   r   r   s   @r!   r   r   C  s        q2 q q q q q q -2& &y& 	&  "y	&
 $D>& 
ry	& & & &P	E 	E 	E 	E 	E 	E 	E 	Er    r   c                        e Zd ZdZdef fdZ	 	 	 	 	 	 ddeej                 deej                 dee	         dee	         d	ee	         d
ee	         de
eef         fdZddZ xZS )TFIdeficsVisionEncoderz
    Transformer encoder consisting of `config.num_hidden_layers` self attention layers. Each layer is a
    [`TFIdeficsVisionEncoderLayer`].

    Args:
        config: IdeficsVisionConfig
    r$   c                      t                      j        di | | _        fdt          j                  D             | _        d| _        d S )Nc                 8    g | ]}t          d |           S )zlayers.r2   )r   ).0ir$   s     r!   
<listcomp>z3TFIdeficsVisionEncoder.__init__.<locals>.<listcomp>  s:     
 
 
HI']q]]CCC
 
 
r    Fr   )r3   r4   r$   r   num_hidden_layersr:   gradient_checkpointingr?   s    ` r!   r4   zTFIdeficsVisionEncoder.__init__  sn    ""6"""
 
 
 
MRSYSkMlMl
 
 
 ',###r    Nr   r   r   output_hidden_statesreturn_dicttrainingrF   c                    n| j         j        ||n| j         j        }||n| j         j        }|rdnd}rdnd}	|}
t	          | j                  D ]a\  }}|r||
fz   }| j        r(|r&fd}t          j         ||          |
||          }n ||
||          }|d         }
r|	|d         fz   }	b|r||
fz   }|st          d |
||	fD                       S t          |
||	          S )	a  
        Args:
            inputs_embeds (`tf.Tensor` of shape `(batch_size, sequence_length, hidden_size)`):
                Optionally, instead of passing `input_ids` you can choose to directly pass an embedded representation.
                This is useful if you want more control over how to convert `input_ids` indices into associated vectors
                than the model's internal embedding lookup matrix.
            attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Mask to avoid performing attention on padding token indices. Mask values selected in `[0, 1]`:

                - 1 for tokens that are **not masked**,
                - 0 for tokens that are **masked**.

                [What are attention masks?](../glossary#attention-mask)
            causal_attention_mask (`tf.Tensor` of shape `(batch_size, sequence_length)`, *optional*):
                Causal mask for the text model. Mask values selected in `[0, 1]`:

                - 1 for tokens that are **not masked**,
                - 0 for tokens that are **masked**.

                [What are attention masks?](../glossary#attention-mask)
            output_attentions (`bool`, *optional*):
                Whether or not to return the attentions tensors of all attention layers. See `attentions` under
                returned tensors for more detail.
            output_hidden_states (`bool`, *optional*):
                Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors
                for more detail.
            return_dict (`bool`, *optional*):
                Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
        Nr   c                       fd}|S )Nc                       g | R  S r   r   )inputsmoduler   s    r!   custom_forwardzRTFIdeficsVisionEncoder.call.<locals>.create_custom_forward.<locals>.custom_forward  s    %vAvA/@AAAAr    r   )r   r   r   s   ` r!   create_custom_forwardz:TFIdeficsVisionEncoder.call.<locals>.create_custom_forward  s0    B B B B B B *)r    )r   r   r   c              3      K   | ]}||V  	d S r   r   )r   vs     r!   	<genexpr>z.TFIdeficsVisionEncoder.call.<locals>.<genexpr>  s(      eeqWXWdWdWdWdWdeer    )r   r   r   )r$   r   r   use_return_dict	enumerater:   r   r   recompute_gradtupler	   )r@   inputs_embedsr   r   r   r   r   r   encoder_statesall_attentionsr   idxencoder_layerr   layer_outputss       `          r!   r|   zTFIdeficsVisionEncoder.call  s   N 2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]3=0:d%"+DK"8"8 	F 	FC# C!/=2B!B* x * * * * * !# 1))-88!")	! ! !.!")&7	! ! ! *!,M  F!/=3C2E!E 	?+}.>>N 	fee]NN$Seeeeee +>Vd
 
 
 	
r    c                     | j         rd S d| _         t          | dd           P| j        D ]J}t          j        |j                  5  |                    d            d d d            n# 1 swxY w Y   Id S d S )NTr:   )r~   r   r:   r   r   r/   r   )r@   r   layers      r!   r   zTFIdeficsVisionEncoder.build  s    : 	F
44((4 & &]5:.. & &KK%%%& & & & & & & & & & & & & & & 54& &s   A&&A*	-A*	)NNNNNNr   )r   r   r   r   r   r4   r   r   r   r   r   r   r	   r|   r   r   r   s   @r!   r   r     s        ,2 , , , , , , /359,0/3&*#'V
 V
 !+V
  (	2	V

 $D>V
 'tnV
 d^V
 4.V
 
u''	(V
 V
 V
 V
p& & & & & & & &r    r   c                        e Zd Zdef fdZ	 	 	 	 	 	 ddeej                 dee         dee         dee         d	ee         d
ee         de	e
ef         fdZddZ xZS )TFIdeficsVisionTransformerr$   c                 z    t                      j        |fi | || _        |j        | _        t          |d          | _        t          j        j	        
                    |j        d          | _        t          |d          | _        t          j        j	        
                    |j        d          | _        d S )NrC   r2   pre_layrnormr   encoderpost_layernorm)r3   r4   r$   r5   r6   r#   rC   r   r9   r:   r   r   r   r   r   r   r?   s      r!   r4   z#TFIdeficsVisionTransformer.__init__  s    **6***+3FNNNHO>>vG\cq>rr-f9EEE ho@@I^eu@vvr    NFrn   r   r   rm   r   r   rF   c                    ||n| j         j        }||n| j         j        }||n| j         j        }|t	          d          |                     ||          }|                     |          }|                     |||||          }|d         }	|	dddddf         }
|                     |
          }
|s|	|
f|dd         z   S t          |	|
|j
        |j                  S )z
        Returns:

        Nz You have to specify pixel_values)rm   )r   r   r   r   r   r   r   )r   pooler_outputr   r   )r$   r   r   r   r^   rC   r   r   r   r
   r   r   )r@   rn   r   r   rm   r   r   r   encoder_outputsr   pooled_outputs              r!   r|   zTFIdeficsVisionTransformer.call  s2    2C1N--TXT_Tq$8$D  $+Jj 	 &1%<kk$+B]?@@@Oghh))-88,,'/!5# ' 
 
 ,A.)!!!Q'2++M:: 	L%}58KKK+/')7&1	
 
 
 	
r    c                 R   | j         rd S d| _         t          | dd           Pt          j        | j        j                  5  | j                            d            d d d            n# 1 swxY w Y   t          | dd           Xt          j        | j        j                  5  | j                            d d | j        g           d d d            n# 1 swxY w Y   t          | dd           Pt          j        | j	        j                  5  | j	                            d            d d d            n# 1 swxY w Y   t          | dd           Zt          j        | j
        j                  5  | j
                            d | j        g           d d d            d S # 1 swxY w Y   d S d S )NTrC   r   r   r   )r~   r   r   r   rC   r/   r   r   r6   r   r   r   s     r!   r   z TFIdeficsVisionTransformer.build-  s   : 	F
4t,,8t344 , ,%%d+++, , , , , , , , , , , , , , ,4..:t0566 F F!''tT^(DEEEF F F F F F F F F F F F F F F4D))5t|011 ) )""4((() ) ) ) ) ) ) ) ) ) ) ) ) ) )4)400<t2788 B B#))4*@AAAB B B B B B B B B B B B B B B B B B =<sH    A''A+.A+!#CCC
D11D58D5+"FF!F)NNNFNFr   )r   r   r   r   r4   r   r   r   r   r   r   r
   r|   r   r   r   s   @r!   r   r     s        w2 w w w w w w -1,0/338&*#(,
 ,
ry),
 $D>,
 'tn	,

 #+4.,
 d^,
 4.,
 
u22	3,
 ,
 ,
 ,
\B B B B B B B Br    r   )&r   rQ   dataclassesr   typingr   r   r   
tensorflowr   activations_tfr   modeling_tf_outputsr	   r
   modeling_tf_utilsr   r   tf_utilsr   utilsr   r   configuration_ideficsr   
get_loggerr   loggerr   r9   r:   Layerr#   r   r   r   r   r   r   r    r!   <module>r	     s   V U  ! ! ! ! ! ! ) ) ) ) ) ) ) ) ) )     / / / / / / R R R R R R R R > > > > > > > >       ) ) ) ) ) ) ) ) 6 6 6 6 6 6 
	H	%	% 2 2 2 2 2 2 2 28n4 n4 n4 n4 n4 5 n4 n4 n4bvF vF vF vF vFrx4 vF vF vFr> > > > >. > > >4:E :E :E :E :E"(/"7 :E :E :Ezp& p& p& p& p&RX_2 p& p& p&fIB IB IB IB IB!2 IB IB IB IB IBr    