Commit 2d357a9
committed
File tree
549 files changed
+128085
-33077
lines changed- .github
- actions
- scripts
- workflows
- .gitlab
- scripts
- stages
- docker
- common
- docs
- developer
- discussions
- megatron-fsdp-user-guide
- example-scripts
- images/fine_grained_activation_offloading
- user-guide/features
- examples
- inference/gpt
- multimodal
- llama_3p1_nemotron_nano_vl_8b_v1
- nvlm
- radio
- post_training/modelopt
- conf/nvidia
- megatron
- core
- datasets
- dist_checkpointing/strategies
- distributed
- fsdp
- src
- megatron_fsdp
- extensions
- inference
- contexts
- attention_context
- engines
- text_generation_server/dynamic_text_gen_server
- endpoints
- models
- T5
- bert
- common
- embeddings
- language_module
- gpt
- heterogeneous
- mamba
- multimodal
- retro
- optimizer
- cpu_offloading
- pipeline_parallel
- resharding
- ssm
- tensor_parallel
- transformer
- experimental_attention_variant
- moe
- post_training
- rl
- inference
- server/inference
- training
- datasets
- tokenizer
- tests
- functional_tests
- python_test_utils
- shell_test_utils
- test_cases
- bert
- bert_mcore_tp1_pp2
- bert_mcore_tp1_pp4_vp2
- bert_mcore_tp2_pp2_local_spec
- bert_mcore_tp2_pp2_resume_torch_dist_local_spec
- bert_mcore_tp2_pp2_resume_torch_dist
- bert_mcore_tp2_pp2
- bert_mcore_tp4_pp1
- gpt
- gpt3_7b_tp1_pp4_memory_speed
- gpt3_7b_tp4_pp1_memory_speed
- gpt3_mcore_reruns_resume_check_grads
- gpt3_mcore_te_tp1_pp1_dist_optimizer_fim_dataset
- gpt3_mcore_te_tp1_pp1_dist_optimizer_no_mmap_bin_files
- gpt3_mcore_te_tp1_pp1_resume_torch_dist_dist_optimizer
- gpt3_mcore_te_tp1_pp1_resume_torch_dist_uniform_full_recompute
- gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings_interleaved_no_fusion
- gpt3_mcore_te_tp1_pp2_resume_torch_dist_rope_embeddings
- gpt3_mcore_te_tp1_pp4_resume_torch_dist_disable_bias_linear
- gpt3_mcore_te_tp1_pp4_resume_torch_dist_persistent_disable_bias_linear
- gpt3_mcore_te_tp1_pp4_resume_torch_dist_swiglu
- gpt3_mcore_te_tp1_pp4_resume_torch_dist_untie_embeddings_and_outputs
- gpt3_mcore_te_tp1_pp4_vp1_dist_optimizer_overlap_grad_reduce_param_gather_overlap_optimizer
- gpt3_mcore_te_tp1_pp4_vp1_resume_torch_decoupled_lr
- gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_calculate_per_token_loss
- gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_untied
- gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce
- gpt3_mcore_te_tp1_pp4_vp1_resume_torch_dist_tunable_overlap
- gpt3_mcore_te_tp1_pp4_vp1_tunable_overlap
- gpt3_mcore_te_tp1_pp4_vp1_uneven_pipeline
- gpt3_mcore_te_tp1_pp4_vp1
- gpt3_mcore_te_tp1_pp4_vp2_account_for_embedding_loss_in_pipeline_split
- gpt3_mcore_te_tp2_pp1_gdn
- gpt3_mcore_te_tp2_pp1_resume_torch_dist_cp2_nondeterministic
- gpt3_mcore_te_tp2_pp1_resume_torch_dist_multi_dist_optimizer_instances
- gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss_nondeterministic
- gpt3_mcore_te_tp2_pp2_cp2_calculate_per_token_loss
- gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_dp_last
- gpt3_mcore_te_tp2_pp2_cp2_etp4_calculate_per_token_loss_nondeterministic_dp_last
- gpt3_mcore_te_tp2_pp2_cp2_etp4_dp_last
- gpt3_mcore_te_tp2_pp2_cp2_etp4_nondeterministic_dp_last
- gpt3_mcore_te_tp2_pp2_cp2_nondeterministic
- gpt3_mcore_te_tp2_pp2_cp2
- gpt3_mcore_te_tp2_pp2_cross_entropy_loss_fusion
- gpt3_mcore_te_tp2_pp2_mla
- gpt3_mcore_te_tp2_pp2_resume_torch_dist_cp2_nondeterministic
- gpt3_mcore_te_tp2_pp2_resume_torch_dist_cross_entropy_loss_fusion
- gpt3_mcore_te_tp2_pp2_resume_torch_dist_ddp_average_in_collective
- gpt3_mcore_te_tp2_pp2_resume_torch_dist_defer_embedding_wgrad_compute
- gpt3_mcore_te_tp2_pp2_resume_torch_dist_no_create_attention_mask_in_dataloader
- gpt3_mcore_te_tp2_pp2_resume_torch_dist_reshard_1x4xNone
- gpt3_mcore_te_tp2_pp2_resume_torch_dist
- gpt3_mcore_te_tp2_zp_z3_resume_fsdp_dtensor
- gpt3_mcore_te_tp4_pp1_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_mcore_te_tp4_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce
- gpt3_mcore_te_tp4_pp1_resume_torch_dist_qk_layernorm_test_mode
- gpt3_mcore_te_tp4_pp2_resume_torch_dist_reshard_8x1xNone
- gpt3_mcore_tp1_pp1_resume_torch_dist_dist_optimizer_overlap_grad_reduce_param_gather
- gpt3_mcore_tp1_pp2_resume_torch_dist
- gpt3_mcore_tp1_pp2
- gpt3_mcore_tp1_pp4_resume_torch_dist
- gpt3_mcore_tp1_pp4
- gpt3_mcore_tp4_pp1_resume_torch_dist
- gpt3_mcore_tp4_pp1_resume_torch
- gpt3_weekly_dgx_h100_mcore_tp2_pp2_current_scaling_native_fp8_tp_pp_sp_tp_overlap
- gpt_grpo_tp1_pp1_dp8_583m_throughputtest_github
- gpt_grpo_tp1_pp1_dp8_583m_throughputtest
- gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest_github
- gpt_grpo_tp1tp2_pp1_dp8_583m_throughputtest
- gpt_grpo_tp2tp1_pp4pp2_dp8_583m_throughputtest
- hybrid
- hybrid_dynamic_inference_tp1_pp1_dp8_583m_chunked_prefill
- hybrid_mr_mcore_te_tp1_pp1_cp1_dgx_a100_1N8G
- hybrid_mr_mcore_te_tp2_pp1_cp1_dgx_a100_1N8G
- hybrid_mr_mcore_te_tp2_pp1_cp4_dgx_a100_1N8G
- moe
- gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic_dp_last
- gpt3_mcore_cp2_pp2_ep2_te_4experts2parallel_nondeterministic
- gpt3_mcore_te_tp1_pp2_resume_torch_dist_reshard_2x1x4_te_8experts2parallel_dist_optimizer
- gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_dist_optimizer
- gpt3_mcore_te_tp2_pp1_resume_torch_dist_te_8experts2parallel_multi_dist_optimizer_instances
- gpt3_mcore_te_tp2_pp1_te_8experts2parallel_ddp_average_in_collective
- gpt3_mcore_te_tp2_pp1_te_8experts2parallel_overlap_grad_reduce_param_gather_groupedGEMM
- gpt3_mcore_te_tp2_pp1_te_8experts_etp1_ep4
- gpt3_mcore_te_tp2_pp1_te_a2a_ovlp_8experts_etp1_ep4
- gpt3_mcore_te_tp2_pp2_ep4_etp1_memory_speed
- gpt3_mcore_te_tp2_pp2_ep4_etp1_resume_torch_dist_attn_cudagraph
- gpt3_mcore_te_tp2_zp_z3_resume_torch_dist_te_8experts2parallel_top2router
- gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel_dp_last
- gpt3_mcore_tp2_cp2_pp2_ep2_te_4experts2parallel
- gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel_dp_last
- gpt3_mcore_tp2_pp2_ep2_etp2_te_4experts2parallel
- gpt3_mcore_tp2_pp2_ep2_te_4experts2parallel
- gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_muon
- gpt3_moe_mcore_te_ep8_resume_torch_dist_dist_optimizer
- gpt3_moe_mcore_te_ep8_resume_torch_dist_muon
- gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_fine_grained_offloading
- gpt3_moe_mcore_te_tp2_pp2_ep4_etp1_no_mtp_no_a2a_ovlp_fine_grained_offloading
- gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_resume_torch_dist_dist_optimizer
- gpt3_moe_mcore_te_tp4_ep2_etp2_pp2_scoped_cudagraph
- gpt_grpo_tp8tp4_pp1_ep8ep2_dp8_throughputtest
- multimodal-llava
- multimodal_llava_mcore_te_tp1_pp1
- multimodal_llava_mcore_te_tp4_sp_cp2
- t5
- t5_11b_mcore_tp4_pp1
- t5_mcore_te_tp1_pp1_vp1_resume_torch
- t5_mcore_te_tp2_pp1_vp1_sequence_parallel
- t5_mcore_te_tp2_pp1_vp1
- t5_mcore_te_tp4_pp1_resume_torch_dist
- t5_mcore_te_tp4_pp1
- t5_mcore_tp1_pp1_vp1_resume_torch
- t5_mcore_tp1_pp1_vp1
- t5_mcore_tp2_pp1_vp1
- t5_mcore_tp4_pp1_resume_torch_dist
- t5_mcore_tp4_pp1
- test_utils
- python_scripts
- recipes
- unit_tests
- a2a_overlap
- dist_checkpointing
- distributed
- fsdp
- inference
- contexts
- engines
- models
- pipeline_parallel
- post_training
- ssm
- tensor_parallel
- transformer
- moe
- tools
Some content is hidden
Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
549 files changed
+128085
-33077
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
11 | 11 | | |
12 | 12 | | |
13 | 13 | | |
14 | | - | |
15 | | - | |
| 14 | + | |
| 15 | + | |
16 | 16 | | |
17 | 17 | | |
18 | 18 | | |
19 | | - | |
| 19 | + | |
20 | 20 | | |
21 | 21 | | |
22 | | - | |
| 22 | + | |
23 | 23 | | |
24 | | - | |
| 24 | + | |
25 | 25 | | |
26 | | - | |
| 26 | + | |
27 | 27 | | |
28 | 28 | | |
29 | | - | |
| 29 | + | |
30 | 30 | | |
31 | | - | |
| 31 | + | |
32 | 32 | | |
33 | | - | |
| 33 | + | |
34 | 34 | | |
35 | | - | |
| 35 | + | |
36 | 36 | | |
37 | 37 | | |
38 | 38 | | |
| |||
43 | 43 | | |
44 | 44 | | |
45 | 45 | | |
46 | | - | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
| 49 | + | |
47 | 50 | | |
48 | 51 | | |
49 | 52 | | |
50 | | - | |
| 53 | + | |
51 | 54 | | |
52 | 55 | | |
53 | 56 | | |
| |||
77 | 80 | | |
78 | 81 | | |
79 | 82 | | |
| 83 | + | |
80 | 84 | | |
81 | 85 | | |
82 | 86 | | |
| |||
119 | 123 | | |
120 | 124 | | |
121 | 125 | | |
| 126 | + | |
122 | 127 | | |
123 | 128 | | |
124 | | - | |
| 129 | + | |
| 130 | + | |
125 | 131 | | |
126 | 132 | | |
127 | 133 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1 | 1 | | |
2 | 2 | | |
3 | 3 | | |
4 | | - | |
| 4 | + | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
1 | 1 | | |
2 | 2 | | |
3 | | - | |
4 | | - | |
| 3 | + | |
| 4 | + | |
5 | 5 | | |
6 | 6 | | |
7 | | - | |
8 | | - | |
| 7 | + | |
| 8 | + | |
9 | 9 | | |
10 | 10 | | |
11 | | - | |
12 | | - | |
| 11 | + | |
| 12 | + | |
| 13 | + | |
| 14 | + | |
| 15 | + | |
| 16 | + | |
| 17 | + | |
| 18 | + | |
| 19 | + | |
| 20 | + | |
13 | 21 | | |
14 | 22 | | |
15 | 23 | | |
16 | | - | |
| 24 | + | |
| 25 | + | |
| 26 | + | |
| 27 | + | |
| 28 | + | |
| 29 | + | |
| 30 | + | |
| 31 | + | |
| 32 | + | |
17 | 33 | | |
18 | 34 | | |
19 | 35 | | |
20 | | - | |
| 36 | + | |
21 | 37 | | |
22 | 38 | | |
23 | 39 | | |
24 | | - | |
| 40 | + | |
25 | 41 | | |
26 | 42 | | |
27 | 43 | | |
28 | | - | |
| 44 | + | |
| 45 | + | |
| 46 | + | |
| 47 | + | |
| 48 | + | |
29 | 49 | | |
30 | 50 | | |
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
19 | 19 | | |
20 | 20 | | |
21 | 21 | | |
| 22 | + | |
| 23 | + | |
| 24 | + | |
22 | 25 | | |
23 | 26 | | |
24 | 27 | | |
25 | 28 | | |
26 | 29 | | |
| 30 | + | |
27 | 31 | | |
28 | 32 | | |
| 33 | + | |
| 34 | + | |
| 35 | + | |
| 36 | + | |
29 | 37 | | |
30 | 38 | | |
31 | 39 | | |
| |||
74 | 82 | | |
75 | 83 | | |
76 | 84 | | |
| 85 | + | |
| 86 | + | |
| 87 | + | |
| 88 | + | |
| 89 | + | |
| 90 | + | |
| 91 | + | |
| 92 | + | |
| 93 | + | |
| 94 | + | |
| 95 | + | |
| 96 | + | |
| 97 | + | |
| 98 | + | |
| 99 | + | |
| 100 | + | |
| 101 | + | |
| 102 | + | |
| 103 | + | |
| 104 | + | |
| 105 | + | |
| 106 | + | |
| 107 | + | |
| 108 | + | |
| 109 | + | |
| 110 | + | |
| 111 | + | |
| 112 | + | |
| 113 | + | |
| 114 | + | |
| 115 | + | |
| 116 | + | |
| 117 | + | |
| 118 | + | |
| 119 | + | |
| 120 | + | |
| 121 | + | |
| 122 | + | |
| 123 | + | |
| 124 | + | |
| 125 | + | |
| 126 | + | |
| 127 | + | |
| 128 | + | |
| 129 | + | |
| 130 | + | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
| 141 | + | |
| 142 | + | |
| 143 | + | |
| 144 | + | |
| 145 | + | |
| 146 | + | |
| 147 | + | |
| 148 | + | |
| 149 | + | |
| 150 | + | |
| 151 | + | |
| 152 | + | |
| 153 | + | |
| 154 | + | |
| 155 | + | |
| 156 | + | |
| 157 | + | |
| 158 | + | |
| 159 | + | |
| 160 | + | |
| 161 | + | |
| 162 | + | |
| 163 | + | |
| 164 | + | |
| 165 | + | |
| 166 | + | |
| 167 | + | |
| 168 | + | |
| 169 | + | |
| 170 | + | |
| 171 | + | |
| 172 | + | |
| 173 | + | |
| 174 | + | |
| 175 | + | |
| 176 | + | |
| 177 | + | |
| 178 | + | |
| 179 | + | |
| 180 | + | |
| 181 | + | |
| 182 | + | |
| 183 | + | |
| 184 | + | |
| 185 | + | |
| 186 | + | |
| 187 | + | |
| 188 | + | |
| 189 | + | |
| 190 | + | |
| 191 | + | |
| 192 | + | |
| 193 | + | |
| 194 | + | |
| 195 | + | |
| 196 | + | |
| 197 | + | |
| 198 | + | |
| 199 | + | |
| 200 | + | |
| 201 | + | |
| 202 | + | |
| 203 | + | |
| 204 | + | |
| 205 | + | |
| 206 | + | |
| 207 | + | |
| 208 | + | |
| 209 | + | |
| 210 | + | |
| 211 | + | |
| 212 | + | |
| 213 | + | |
| 214 | + | |
| 215 | + | |
| 216 | + | |
| 217 | + | |
| 218 | + | |
| 219 | + | |
| 220 | + | |
| 221 | + | |
| 222 | + | |
| 223 | + | |
| 224 | + | |
| 225 | + | |
| 226 | + | |
| 227 | + | |
| 228 | + | |
| 229 | + | |
| 230 | + | |
| 231 | + | |
| 232 | + | |
| 233 | + | |
| 234 | + | |
| 235 | + | |
| 236 | + | |
| 237 | + | |
77 | 238 | | |
78 | 239 | | |
79 | 240 | | |
| |||
111 | 272 | | |
112 | 273 | | |
113 | 274 | | |
| 275 | + | |
114 | 276 | | |
115 | 277 | | |
| 278 | + | |
116 | 279 | | |
117 | 280 | | |
118 | 281 | | |
119 | 282 | | |
120 | 283 | | |
121 | 284 | | |
| 285 | + | |
| 286 | + | |
| 287 | + | |
122 | 288 | | |
123 | 289 | | |
124 | 290 | | |
| |||
225 | 391 | | |
226 | 392 | | |
227 | 393 | | |
228 | | - | |
229 | | - | |
230 | | - | |
231 | | - | |
232 | | - | |
233 | | - | |
234 | | - | |
235 | | - | |
236 | | - | |
237 | | - | |
| 394 | + | |
238 | 395 | | |
239 | 396 | | |
240 | 397 | | |
241 | | - | |
242 | | - | |
| 398 | + | |
| 399 | + | |
243 | 400 | | |
244 | 401 | | |
245 | 402 | | |
246 | | - | |
| 403 | + | |
247 | 404 | | |
248 | 405 | | |
249 | 406 | | |
| |||
0 commit comments