forked from huggingface/transformers
-
Notifications
You must be signed in to change notification settings - Fork 0
Open
Description
Profile which parts of the inference of the big bird model for the glue/colar dataset!
The result
Can be reproduced by running kernprof -l -v profile_bottleneck\inference_pipeline.py
Total time: 41.1655 s
File: C:\Users\MSI\Desktop\Azalia_transformer\transformers\src\transformers\models\big_bird\modeling_big_bird.py
Function: forward at line 1472
Line # Hits Time Per Hit % Time Line Contents
==============================================================
1472 @profile
1473 def forward(
1474 self,
1475 hidden_states,
1476 attention_mask=None,
1477 head_mask=None,
1478 encoder_hidden_states=None,
1479 encoder_attention_mask=None,
1480 band_mask=None,
1481 from_mask=None,
1482 to_mask=None,
1483 blocked_encoder_mask=None,
1484 past_key_value=None,
1485 output_attentions=False,
1486 ):
1487 # decoder uni-directional self-attention cached key/values tuple is at positions 1,2
1488 6000 9052.0 1.5 0.0 self_attn_past_key_value = past_key_value[:2] if past_key_value is not None else None
1489 6000 16753623.2 2792.3 40.7 self_attention_outputs = self.attention(
1490 6000 4530.6 0.8 0.0 hidden_states,
1491 6000 5060.3 0.8 0.0 attention_mask,
1492 6000 4035.0 0.7 0.0 head_mask,
1493 6000 4193.9 0.7 0.0 encoder_hidden_states=encoder_hidden_states,
1494 6000 4677.3 0.8 0.0 encoder_attention_mask=encoder_attention_mask,
1495 6000 3811.4 0.6 0.0 past_key_value=self_attn_past_key_value,
1496 6000 3928.3 0.7 0.0 output_attentions=output_attentions,
1497 6000 4026.9 0.7 0.0 band_mask=band_mask,
1498 6000 3992.7 0.7 0.0 from_mask=from_mask,
1499 6000 4089.0 0.7 0.0 to_mask=to_mask,
1500 6000 3629.0 0.6 0.0 from_blocked_mask=blocked_encoder_mask,
1501 6000 3344.9 0.6 0.0 to_blocked_mask=blocked_encoder_mask,
1502 )
1503 6000 7824.1 1.3 0.0 attention_output = self_attention_outputs[0]
1504
1505 # if decoder, the last output is tuple of self-attn cache
1506 6000 13634.4 2.3 0.0 if self.is_decoder:
1507 outputs = self_attention_outputs[1:-1]
1508 present_key_value = self_attention_outputs[-1]
1509 else:
1510 6000 7762.8 1.3 0.0 outputs = self_attention_outputs[1:] # add self attentions if we output attention weights
1511
1512 6000 4584.1 0.8 0.0 cross_attn_present_key_value = None
1513 6000 7343.2 1.2 0.0 if self.is_decoder and encoder_hidden_states is not None:
1514 if not hasattr(self, "crossattention"):
1515 raise ValueError(
1516 f"If `encoder_hidden_states` are passed, {self} has to be instantiated with "
1517 " cross-attention layers by setting `config.add_cross_attention=True`"
1518 )
1519
1520 # cross_attn cached key/values tuple is at positions 3,4 of past_key_value tuple
1521 cross_attn_past_key_value = past_key_value[-2:] if past_key_value is not None else None
1522 cross_attention_outputs = self.crossattention(
1523 attention_output,
1524 attention_mask,
1525 head_mask,
1526 encoder_hidden_states,
1527 encoder_attention_mask,
1528 cross_attn_past_key_value,
1529 output_attentions,
1530 )
1531 attention_output = cross_attention_outputs[0]
1532 outputs = outputs + cross_attention_outputs[1:-1] # add cross attentions if we output attention weights
1533
1534 # add cross-attn cache to positions 3,4 of present_key_value tuple
1535 cross_attn_present_key_value = cross_attention_outputs[-1]
1536 present_key_value = present_key_value + cross_attn_present_key_value
1537
1538 6000 24264013.6 4044.0 58.9 layer_output = apply_chunking_to_forward(
1539 6000 13603.3 2.3 0.0 self.feed_forward_chunk, self.chunk_size_feed_forward, self.seq_len_dim, attention_output
1540 )
1541
1542 6000 15522.8 2.6 0.0 outputs = (layer_output,) + outputs
1543
1544 # if decoder, return the attn key/values as the last output
1545 6000 14400.9 2.4 0.0 if self.is_decoder:
1546 outputs = outputs + (present_key_value,)
1547
1548 6000 4782.3 0.8 0.0 return outputs
Metadata
Metadata
Assignees
Labels
No labels