Commit cbc4557
committed
Add Warmup-Stable-Decay (WSD) learning rate scheduler with configurable stable and decay phases
Signed-off-by: bzantium <[email protected]>1 parent 08216c6 commit cbc4557
File tree
4 files changed
+194
-23
lines changed- src/MaxText
- configs
- tests
4 files changed
+194
-23
lines changed| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
607 | 607 | | |
608 | 608 | | |
609 | 609 | | |
610 | | - | |
| 610 | + | |
611 | 611 | | |
612 | 612 | | |
613 | 613 | | |
| |||
635 | 635 | | |
636 | 636 | | |
637 | 637 | | |
638 | | - | |
639 | | - | |
| 638 | + | |
| 639 | + | |
| 640 | + | |
| 641 | + | |
| 642 | + | |
| 643 | + | |
| 644 | + | |
| 645 | + | |
| 646 | + | |
640 | 647 | | |
641 | | - | |
642 | | - | |
| 648 | + | |
| 649 | + | |
| 650 | + | |
| 651 | + | |
| 652 | + | |
643 | 653 | | |
644 | 654 | | |
645 | | - | |
646 | | - | |
| 655 | + | |
| 656 | + | |
| 657 | + | |
| 658 | + | |
| 659 | + | |
| 660 | + | |
647 | 661 | | |
648 | 662 | | |
649 | 663 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
124 | 124 | | |
125 | 125 | | |
126 | 126 | | |
| 127 | + | |
| 128 | + | |
| 129 | + | |
| 130 | + | |
| 131 | + | |
| 132 | + | |
| 133 | + | |
| 134 | + | |
| 135 | + | |
| 136 | + | |
| 137 | + | |
| 138 | + | |
| 139 | + | |
| 140 | + | |
127 | 141 | | |
128 | 142 | | |
129 | 143 | | |
| |||
1005 | 1019 | | |
1006 | 1020 | | |
1007 | 1021 | | |
| 1022 | + | |
| 1023 | + | |
| 1024 | + | |
1008 | 1025 | | |
1009 | 1026 | | |
1010 | 1027 | | |
| 1028 | + | |
| 1029 | + | |
| 1030 | + | |
| 1031 | + | |
| 1032 | + | |
| 1033 | + | |
| 1034 | + | |
| 1035 | + | |
| 1036 | + | |
1011 | 1037 | | |
1012 | 1038 | | |
1013 | 1039 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
40 | 40 | | |
41 | 41 | | |
42 | 42 | | |
| 43 | + | |
43 | 44 | | |
44 | 45 | | |
45 | 46 | | |
| |||
1103 | 1104 | | |
1104 | 1105 | | |
1105 | 1106 | | |
1106 | | - | |
1107 | | - | |
1108 | | - | |
| 1107 | + | |
| 1108 | + | |
| 1109 | + | |
| 1110 | + | |
| 1111 | + | |
| 1112 | + | |
| 1113 | + | |
1109 | 1114 | | |
1110 | | - | |
| 1115 | + | |
| 1116 | + | |
| 1117 | + | |
| 1118 | + | |
1111 | 1119 | | |
1112 | 1120 | | |
1113 | 1121 | | |
1114 | 1122 | | |
1115 | 1123 | | |
| 1124 | + | |
| 1125 | + | |
1116 | 1126 | | |
1117 | 1127 | | |
1118 | 1128 | | |
| |||
1122 | 1132 | | |
1123 | 1133 | | |
1124 | 1134 | | |
1125 | | - | |
1126 | | - | |
1127 | 1135 | | |
1128 | | - | |
1129 | 1136 | | |
1130 | | - | |
1131 | 1137 | | |
1132 | | - | |
1133 | | - | |
1134 | 1138 | | |
1135 | | - | |
1136 | | - | |
1137 | | - | |
1138 | | - | |
1139 | | - | |
| 1139 | + | |
| 1140 | + | |
| 1141 | + | |
| 1142 | + | |
| 1143 | + | |
| 1144 | + | |
| 1145 | + | |
| 1146 | + | |
| 1147 | + | |
| 1148 | + | |
| 1149 | + | |
| 1150 | + | |
| 1151 | + | |
| 1152 | + | |
| 1153 | + | |
| 1154 | + | |
| 1155 | + | |
| 1156 | + | |
| 1157 | + | |
| 1158 | + | |
| 1159 | + | |
| 1160 | + | |
| 1161 | + | |
| 1162 | + | |
| 1163 | + | |
| 1164 | + | |
| 1165 | + | |
| 1166 | + | |
| 1167 | + | |
| 1168 | + | |
| 1169 | + | |
| 1170 | + | |
| 1171 | + | |
| 1172 | + | |
| 1173 | + | |
1140 | 1174 | | |
1141 | 1175 | | |
| 1176 | + | |
1142 | 1177 | | |
1143 | | - | |
| 1178 | + | |
1144 | 1179 | | |
1145 | 1180 | | |
1146 | 1181 | | |
| |||
| Original file line number | Diff line number | Diff line change | |
|---|---|---|---|
| |||
682 | 682 | | |
683 | 683 | | |
684 | 684 | | |
| 685 | + | |
| 686 | + | |
| 687 | + | |
| 688 | + | |
| 689 | + | |
| 690 | + | |
| 691 | + | |
| 692 | + | |
| 693 | + | |
| 694 | + | |
| 695 | + | |
| 696 | + | |
| 697 | + | |
| 698 | + | |
| 699 | + | |
| 700 | + | |
| 701 | + | |
| 702 | + | |
| 703 | + | |
| 704 | + | |
| 705 | + | |
| 706 | + | |
| 707 | + | |
| 708 | + | |
| 709 | + | |
| 710 | + | |
| 711 | + | |
| 712 | + | |
| 713 | + | |
| 714 | + | |
| 715 | + | |
| 716 | + | |
| 717 | + | |
| 718 | + | |
| 719 | + | |
| 720 | + | |
| 721 | + | |
| 722 | + | |
| 723 | + | |
| 724 | + | |
| 725 | + | |
| 726 | + | |
| 727 | + | |
| 728 | + | |
| 729 | + | |
| 730 | + | |
| 731 | + | |
| 732 | + | |
| 733 | + | |
| 734 | + | |
| 735 | + | |
| 736 | + | |
| 737 | + | |
| 738 | + | |
| 739 | + | |
| 740 | + | |
| 741 | + | |
| 742 | + | |
| 743 | + | |
| 744 | + | |
| 745 | + | |
| 746 | + | |
| 747 | + | |
| 748 | + | |
| 749 | + | |
| 750 | + | |
| 751 | + | |
| 752 | + | |
| 753 | + | |
| 754 | + | |
| 755 | + | |
| 756 | + | |
| 757 | + | |
| 758 | + | |
| 759 | + | |
| 760 | + | |
| 761 | + | |
| 762 | + | |
| 763 | + | |
| 764 | + | |
| 765 | + | |
| 766 | + | |
| 767 | + | |
| 768 | + | |
| 769 | + | |
| 770 | + | |
| 771 | + | |
| 772 | + | |
| 773 | + | |
| 774 | + | |
| 775 | + | |
| 776 | + | |
| 777 | + | |
| 778 | + | |
| 779 | + | |
| 780 | + | |
685 | 781 | | |
686 | 782 | | |
0 commit comments