
    g                     ^    d Z ddlmZ ddlmZ  ej        e          Z G d de          ZdS )zUDOP model configuration   )PretrainedConfig)loggingc                   x     e Zd ZdZdZdgZddddZdd	d
ddddddddiddiddigddddddddd	dddf fd	Z xZS )
UdopConfiga  
    This is the configuration class to store the configuration of a [`UdopForConditionalGeneration`]. It is used to
    instantiate a UDOP model according to the specified arguments, defining the model architecture. Instantiating a
    configuration with the defaults will yield a similar configuration to that of the UDOP
    [microsoft/udop-large](https://huggingface.co/microsoft/udop-large) architecture.

    Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
    documentation from [`PretrainedConfig`] for more information.

    Arguments:
        vocab_size (`int`, *optional*, defaults to 33201):
            Vocabulary size of the UDOP model. Defines the number of different tokens that can be represented by the
            `inputs_ids` passed when calling [`UdopForConditionalGeneration`].
        d_model (`int`, *optional*, defaults to 1024):
            Size of the encoder layers and the pooler layer.
        d_kv (`int`, *optional*, defaults to 64):
            Size of the key, query, value projections per attention head. The `inner_dim` of the projection layer will
            be defined as `num_heads * d_kv`.
        d_ff (`int`, *optional*, defaults to 4096):
            Size of the intermediate feed forward layer in each `UdopBlock`.
        num_layers (`int`, *optional*, defaults to 24):
            Number of hidden layers in the Transformer encoder and decoder.
        num_decoder_layers (`int`, *optional*):
            Number of hidden layers in the Transformer decoder. Will use the same value as `num_layers` if not set.
        num_heads (`int`, *optional*, defaults to 16):
            Number of attention heads for each attention layer in the Transformer encoder and decoder.
        relative_attention_num_buckets (`int`, *optional*, defaults to 32):
            The number of buckets to use for each attention layer.
        relative_attention_max_distance (`int`, *optional*, defaults to 128):
            The maximum distance of the longer sequences for the bucket separation.
        relative_bias_args (`List[dict]`, *optional*, defaults to `[{'type': '1d'}, {'type': 'horizontal'}, {'type': 'vertical'}]`):
            A list of dictionaries containing the arguments for the relative bias layers.
        dropout_rate (`float`, *optional*, defaults to 0.1):
            The ratio for all dropout layers.
        layer_norm_epsilon (`float`, *optional*, defaults to 1e-06):
            The epsilon used by the layer normalization layers.
        initializer_factor (`float`, *optional*, defaults to 1.0):
            A factor for initializing all weight matrices (should be kept to 1, used internally for initialization
            testing).
        feed_forward_proj (`string`, *optional*, defaults to `"relu"`):
            Type of feed forward layer to be used. Should be one of `"relu"` or `"gated-gelu"`. Udopv1.1 uses the
            `"gated-gelu"` feed forward projection. Original Udop uses `"relu"`.
        is_encoder_decoder (`bool`, *optional*, defaults to `True`):
            Whether the model should behave as an encoder/decoder or not.
        use_cache (`bool`, *optional*, defaults to `True`):
            Whether or not the model should return the last key/values attentions (not used by all models).
        pad_token_id (`int`, *optional*, defaults to 0):
            The id of the padding token in the vocabulary.
        eos_token_id (`int`, *optional*, defaults to 1):
            The id of the end-of-sequence token in the vocabulary.
        max_2d_position_embeddings (`int`, *optional*, defaults to 1024):
            The maximum absolute position embeddings for relative position encoding.
        image_size (`int`, *optional*, defaults to 224):
            The size of the input images.
        patch_size (`int`, *optional*, defaults to 16):
            The patch size used by the vision encoder.
        num_channels (`int`, *optional*, defaults to 3):
            The number of channels in the input images.
    udoppast_key_valuesd_model	num_heads
num_layers)hidden_sizenum_attention_headsnum_hidden_layersi  i   @   i      N          type1d
horizontalverticalg?gư>g      ?reluT          r   c                    || _         || _        || _        || _        || _        ||n| j        | _        || _        || _        |	| _        || _	        || _
        || _        || _        || _        || _        || _        || _        || _        t%          |
t&                    st)          d          |
| _        | j                            d          }|d         | _        |d         dk    | _        t3          |          dk    r|d         dk    st3          |          dk    rt5          d| d	           t7                      j        d|||d
| d S )Nz6`relative_bias_args` should be a list of dictionaries.-r   gatedr      z`feed_forward_proj`: z is not a valid activation function of the dense layer.Please make sure `feed_forward_proj` is of the format `gated-{ACT_FN}` or `{ACT_FN}`, e.g. 'gated-gelu' or 'relu')pad_token_ideos_token_idis_encoder_decoder )
vocab_sizer	   d_kvd_ffr   num_decoder_layersr
   relative_attention_num_bucketsrelative_attention_max_distancedropout_ratelayer_norm_epsiloninitializer_factorfeed_forward_proj	use_cachemax_2d_position_embeddings
image_size
patch_sizenum_channels
isinstancelist	TypeErrorrelative_bias_argssplitdense_act_fnis_gated_actlen
ValueErrorsuper__init__)selfr%   r	   r&   r'   r   r(   r
   r)   r*   r7   r+   r,   r-   r.   r#   r/   r!   r"   r0   r1   r2   r3   kwargsact_info	__class__s                            g/var/www/html/ai-engine/env/lib/python3.11/site-packages/transformers/models/udop/configuration_udop.pyr>   zUdopConfig.__init__Y   s   4 %		$"4"@do 	 #.L+/N,("4"4!2" +E'$$(,d33 	VTUUU"4)//44$RL$QK72x==1!!7!73x==1;L;L)(9 ) ) )   	 	
%%1	
 	
 		
 	
 	
 	
 	
    )	__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferenceattribute_mapr>   __classcell__)rB   s   @rC   r   r      s        : :x J#4"5$-khtuuM ')(+#TNV\,BVZDXY #'/D
 D
 D
 D
 D
 D
 D
 D
 D
 D
rD   r   N)	rH   configuration_utilsr   utilsr   
get_loggerrE   loggerr   r$   rD   rC   <module>rQ      s      3 3 3 3 3 3       
	H	%	%E
 E
 E
 E
 E
! E
 E
 E
 E
 E
rD   