1515# This workflow builds and pushes MaxText images for both TPU and GPU devices.
1616# It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
1717
18- name : Build Images
18+ name : Build and Test Images
1919
2020on :
2121 schedule :
@@ -128,3 +128,133 @@ jobs:
128128 dockerfile : ${{ matrix.dockerfile }}
129129 maxtext_sha : ${{ needs.setup.outputs.maxtext_sha }}
130130 image_date : ${{ needs.setup.outputs.image_date }}
131+
132+ pre-training-images-tpu-unit-tests :
133+ needs : [setup, tpu-pre-training]
134+ uses : ./.github/workflows/run_tests_against_package.yml
135+ strategy :
136+ fail-fast : false
137+ matrix :
138+ image_name : [maxtext_jax_stable, maxtext_jax_nightly]
139+ with :
140+ device_type : tpu
141+ device_name : v6e-4
142+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
143+ cloud_runner : linux-x86-ct6e-180-4tpu
144+ pytest_marker : ' not cpu_only and not gpu_only and not integration_test'
145+ xla_python_client_mem_fraction : 0.75
146+ tf_force_gpu_allow_growth : false
147+ container_resource_option : " --privileged"
148+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
149+ maxtext_installed : true
150+
151+ pre-training-images-tpu-integration-tests :
152+ needs : [setup, tpu-pre-training]
153+ uses : ./.github/workflows/run_tests_against_package.yml
154+ strategy :
155+ fail-fast : false
156+ matrix :
157+ image_name : [maxtext_jax_stable, maxtext_jax_nightly]
158+ with :
159+ device_type : tpu
160+ device_name : v6e-4
161+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
162+ cloud_runner : linux-x86-ct6e-180-4tpu
163+ pytest_marker : ' not cpu_only and not gpu_only and integration_test'
164+ xla_python_client_mem_fraction : 0.75
165+ tf_force_gpu_allow_growth : false
166+ container_resource_option : " --privileged"
167+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
168+ maxtext_installed : true
169+
170+ post-training-images-tpu-unit-tests :
171+ needs : [setup, tpu-post-training]
172+ uses : ./.github/workflows/run_tests_against_package.yml
173+ strategy :
174+ fail-fast : false
175+ matrix :
176+ image_name : [maxtext_post_training_stable, maxtext_post_training_nightly]
177+ with :
178+ device_type : tpu
179+ device_name : v6e-4
180+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
181+ cloud_runner : linux-x86-ct6e-180-4tpu
182+ pytest_marker : ' not cpu_only and not gpu_only and not integration_test'
183+ xla_python_client_mem_fraction : 0.75
184+ tf_force_gpu_allow_growth : false
185+ container_resource_option : " --privileged"
186+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
187+ maxtext_installed : true
188+
189+ post-training-images-tpu-integration-tests :
190+ needs : [setup, tpu-post-training]
191+ uses : ./.github/workflows/run_tests_against_package.yml
192+ strategy :
193+ fail-fast : false
194+ matrix :
195+ image_name : [maxtext_post_training_stable, maxtext_post_training_nightly]
196+ with :
197+ device_type : tpu
198+ device_name : v6e-4
199+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
200+ cloud_runner : linux-x86-ct6e-180-4tpu
201+ pytest_marker : ' not cpu_only and not gpu_only and integration_test'
202+ xla_python_client_mem_fraction : 0.75
203+ tf_force_gpu_allow_growth : false
204+ container_resource_option : " --privileged"
205+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
206+ maxtext_installed : true
207+
208+ pre-training-images-gpu-unit-tests :
209+ needs : [setup, gpu-pre-training]
210+ uses : ./.github/workflows/run_tests_against_package.yml
211+ strategy :
212+ fail-fast : false
213+ matrix :
214+ image_name : [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
215+ with :
216+ device_type : ${{ matrix.cuda }}
217+ device_name : a100-40gb-4
218+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
219+ cloud_runner : linux-x86-a2-48-a100-4gpu
220+ pytest_marker : ' not cpu_only and not tpu_only and not integration_test'
221+ xla_python_client_mem_fraction : 0.65
222+ tf_force_gpu_allow_growth : true
223+ container_resource_option : " --shm-size 2g --runtime=nvidia --gpus all --privileged"
224+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
225+ maxtext_installed : true
226+
227+ pre-training-images-gpu-integration-tests :
228+ needs : [setup, gpu-pre-training]
229+ uses : ./.github/workflows/run_tests_against_package.yml
230+ strategy :
231+ fail-fast : false
232+ matrix :
233+ image_name : [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
234+ with :
235+ device_type : ${{ matrix.cuda }}
236+ device_name : a100-40gb-4
237+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
238+ cloud_runner : linux-x86-a2-48-a100-4gpu
239+ pytest_marker : ' not cpu_only and not tpu_only and integration_test'
240+ xla_python_client_mem_fraction : 0.65
241+ tf_force_gpu_allow_growth : true
242+ container_resource_option : " --shm-size 2g --runtime=nvidia --gpus all --privileged"
243+ is_scheduled_run : ${{ github.event_name == 'schedule' }}
244+ maxtext_installed : true
245+
246+ post-training-images-tpu-notebook-tests :
247+ needs : [setup, tpu-post-training]
248+ uses : ./.github/workflows/run_jupyter_notebooks.yml
249+ strategy :
250+ fail-fast : false
251+ matrix :
252+ image_name : [maxtext_post_training_stable, maxtext_post_training_nightly]
253+ with :
254+ device_type : tpu
255+ device_name : v6e-4
256+ base_image : ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
257+ cloud_runner : linux-x86-ct6e-180-4tpu
258+ maxtext_installed : true
259+ secrets :
260+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
0 commit comments