Skip to content

Commit 0e378d2

Browse files
committed
Trigger unit tests for docker images upload workflow
1 parent 40b8f0c commit 0e378d2

File tree

5 files changed

+195
-25
lines changed

5 files changed

+195
-25
lines changed

.github/workflows/UploadDockerImages.yml

Lines changed: 131 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# This workflow builds and pushes MaxText images for both TPU and GPU devices.
1616
# It runs automatically daily at 12am UTC, on Pull Requests, or manually via Workflow Dispatch.
1717

18-
name: Build Images
18+
name: Build and Test Images
1919

2020
on:
2121
schedule:
@@ -128,3 +128,133 @@ jobs:
128128
dockerfile: ${{ matrix.dockerfile }}
129129
maxtext_sha: ${{ needs.setup.outputs.maxtext_sha }}
130130
image_date: ${{ needs.setup.outputs.image_date }}
131+
132+
pre-training-images-tpu-unit-tests:
133+
needs: [setup, tpu-pre-training]
134+
uses: ./.github/workflows/run_tests_against_package.yml
135+
strategy:
136+
fail-fast: false
137+
matrix:
138+
image_name: [maxtext_jax_stable, maxtext_jax_nightly]
139+
with:
140+
device_type: tpu
141+
device_name: v6e-4
142+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
143+
cloud_runner: linux-x86-ct6e-180-4tpu
144+
pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
145+
xla_python_client_mem_fraction: 0.75
146+
tf_force_gpu_allow_growth: false
147+
container_resource_option: "--privileged"
148+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
149+
maxtext_installed: true
150+
151+
pre-training-images-tpu-integration-tests:
152+
needs: [setup, tpu-pre-training]
153+
uses: ./.github/workflows/run_tests_against_package.yml
154+
strategy:
155+
fail-fast: false
156+
matrix:
157+
image_name: [maxtext_jax_stable, maxtext_jax_nightly]
158+
with:
159+
device_type: tpu
160+
device_name: v6e-4
161+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
162+
cloud_runner: linux-x86-ct6e-180-4tpu
163+
pytest_marker: 'not cpu_only and not gpu_only and integration_test'
164+
xla_python_client_mem_fraction: 0.75
165+
tf_force_gpu_allow_growth: false
166+
container_resource_option: "--privileged"
167+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
168+
maxtext_installed: true
169+
170+
post-training-images-tpu-unit-tests:
171+
needs: [setup, tpu-post-training]
172+
uses: ./.github/workflows/run_tests_against_package.yml
173+
strategy:
174+
fail-fast: false
175+
matrix:
176+
image_name: [maxtext_post_training_stable, maxtext_post_training_nightly]
177+
with:
178+
device_type: tpu
179+
device_name: v6e-4
180+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
181+
cloud_runner: linux-x86-ct6e-180-4tpu
182+
pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
183+
xla_python_client_mem_fraction: 0.75
184+
tf_force_gpu_allow_growth: false
185+
container_resource_option: "--privileged"
186+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
187+
maxtext_installed: true
188+
189+
post-training-images-tpu-integration-tests:
190+
needs: [setup, tpu-post-training]
191+
uses: ./.github/workflows/run_tests_against_package.yml
192+
strategy:
193+
fail-fast: false
194+
matrix:
195+
image_name: [maxtext_post_training_stable, maxtext_post_training_nightly]
196+
with:
197+
device_type: tpu
198+
device_name: v6e-4
199+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
200+
cloud_runner: linux-x86-ct6e-180-4tpu
201+
pytest_marker: 'not cpu_only and not gpu_only and integration_test'
202+
xla_python_client_mem_fraction: 0.75
203+
tf_force_gpu_allow_growth: false
204+
container_resource_option: "--privileged"
205+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
206+
maxtext_installed: true
207+
208+
pre-training-images-gpu-unit-tests:
209+
needs: [setup, gpu-pre-training]
210+
uses: ./.github/workflows/run_tests_against_package.yml
211+
strategy:
212+
fail-fast: false
213+
matrix:
214+
image_name: [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
215+
with:
216+
device_type: ${{ matrix.cuda }}
217+
device_name: a100-40gb-4
218+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
219+
cloud_runner: linux-x86-a2-48-a100-4gpu
220+
pytest_marker: 'not cpu_only and not tpu_only and not integration_test'
221+
xla_python_client_mem_fraction: 0.65
222+
tf_force_gpu_allow_growth: true
223+
container_resource_option: "--shm-size 2g --runtime=nvidia --gpus all --privileged"
224+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
225+
maxtext_installed: true
226+
227+
pre-training-images-gpu-integration-tests:
228+
needs: [setup, gpu-pre-training]
229+
uses: ./.github/workflows/run_tests_against_package.yml
230+
strategy:
231+
fail-fast: false
232+
matrix:
233+
image_name: [maxtext_gpu_jax_stable, maxtext_gpu_jax_nightly]
234+
with:
235+
device_type: ${{ matrix.cuda }}
236+
device_name: a100-40gb-4
237+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
238+
cloud_runner: linux-x86-a2-48-a100-4gpu
239+
pytest_marker: 'not cpu_only and not tpu_only and integration_test'
240+
xla_python_client_mem_fraction: 0.65
241+
tf_force_gpu_allow_growth: true
242+
container_resource_option: "--shm-size 2g --runtime=nvidia --gpus all --privileged"
243+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
244+
maxtext_installed: true
245+
246+
post-training-images-tpu-notebook-tests:
247+
needs: [setup, tpu-post-training]
248+
uses: ./.github/workflows/run_jupyter_notebooks.yml
249+
strategy:
250+
fail-fast: false
251+
matrix:
252+
image_name: [maxtext_post_training_stable, maxtext_post_training_nightly]
253+
with:
254+
device_type: tpu
255+
device_name: v6e-4
256+
base_image: ${{ matrix.image_name }}:${{ needs.setup.outputs.image_date }}
257+
cloud_runner: linux-x86-ct6e-180-4tpu
258+
maxtext_installed: true
259+
secrets:
260+
HF_TOKEN: ${{ secrets.HF_TOKEN }}

.github/workflows/build_and_test_maxtext.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ jobs:
113113
with:
114114
device_type: tpu
115115
device_name: v6e-4
116-
image_type: ${{ matrix.image_type }}
116+
base_image: maxtext-unit-test-tpu:${{ matrix.image_type }}
117117
cloud_runner: linux-x86-ct6e-180-4tpu
118118
secrets:
119119
HF_TOKEN: ${{ secrets.HF_TOKEN }}
@@ -131,7 +131,7 @@ jobs:
131131
device_type: cpu
132132
device_name: X64
133133
cloud_runner: linux-x86-n2-16
134-
image_type: ${{ matrix.image_type }}
134+
base_image: maxtext-unit-test-tpu:${{ matrix.image_type }}
135135
pytest_marker: 'cpu_only'
136136
xla_python_client_mem_fraction: 0.75
137137
tf_force_gpu_allow_growth: false
@@ -151,7 +151,7 @@ jobs:
151151
with:
152152
device_type: tpu
153153
device_name: v6e-4
154-
image_type: ${{ matrix.image_type }}
154+
base_image: maxtext-unit-test-tpu:${{ matrix.image_type }}
155155
cloud_runner: linux-x86-ct6e-180-4tpu
156156
pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
157157
xla_python_client_mem_fraction: 0.75
@@ -170,7 +170,7 @@ jobs:
170170
with:
171171
device_type: tpu
172172
device_name: v6e-4
173-
image_type: ${{ matrix.image_type }}
173+
base_image: maxtext-unit-test-tpu:${{ matrix.image_type }}
174174
cloud_runner: linux-x86-ct6e-180-4tpu
175175
pytest_marker: 'not cpu_only and not gpu_only and integration_test'
176176
xla_python_client_mem_fraction: 0.75
@@ -189,7 +189,7 @@ jobs:
189189
with:
190190
device_type: tpu
191191
device_name: v6e-4
192-
image_type: ${{ matrix.image_type }}
192+
base_image: maxtext-unit-test-tpu:${{ matrix.image_type }}
193193
cloud_runner: linux-x86-ct6e-180-4tpu
194194
pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
195195
xla_python_client_mem_fraction: 0.75
@@ -208,7 +208,7 @@ jobs:
208208
with:
209209
device_type: tpu
210210
device_name: v6e-4
211-
image_type: ${{ matrix.image_type }}
211+
base_image: maxtext-unit-test-tpu:${{ matrix.image_type }}
212212
cloud_runner: linux-x86-ct6e-180-4tpu
213213
pytest_marker: 'not cpu_only and not gpu_only and integration_test'
214214
xla_python_client_mem_fraction: 0.75
@@ -228,7 +228,7 @@ jobs:
228228
with:
229229
device_type: ${{ matrix.cuda }}
230230
device_name: a100-40gb-4
231-
image_type: ${{ matrix.image_type }}
231+
base_image: maxtext-unit-test-${{ matrix.cuda }}:${{ matrix.image_type }}
232232
cloud_runner: linux-x86-a2-48-a100-4gpu
233233
pytest_marker: 'not cpu_only and not tpu_only and not integration_test'
234234
xla_python_client_mem_fraction: 0.65
@@ -248,7 +248,7 @@ jobs:
248248
with:
249249
device_type: ${{ matrix.cuda }}
250250
device_name: a100-40gb-4
251-
image_type: ${{ matrix.image_type }}
251+
base_image: maxtext-unit-test-${{ matrix.cuda }}:${{ matrix.image_type }}
252252
cloud_runner: linux-x86-a2-48-a100-4gpu
253253
pytest_marker: 'not cpu_only and not tpu_only and integration_test'
254254
xla_python_client_mem_fraction: 0.65

.github/workflows/run_jupyter_notebooks.yml

Lines changed: 26 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,17 @@ on:
2525
device_name:
2626
required: true
2727
type: string
28-
image_type:
28+
base_image:
2929
required: false
3030
type: string
3131
cloud_runner:
3232
required: false
3333
type: string
34+
# Flag to skip source checkout and wheel installation
35+
maxtext_installed:
36+
required: false
37+
type: boolean
38+
default: false
3439
secrets:
3540
HF_TOKEN:
3641
required: true
@@ -41,14 +46,17 @@ jobs:
4146
run:
4247
runs-on: ${{ inputs.cloud_runner != '' && inputs.cloud_runner || fromJson(format('["self-hosted", "{0}", "{1}"]', inputs.device_type, inputs.device_name)) }}
4348
container:
44-
image: gcr.io/tpu-prod-env-multipod/maxtext-unit-test-${{ inputs.device_type == 'cpu' && 'tpu' || inputs.device_type }}:${{ inputs.image_type != '' && inputs.image_type }}
49+
image: gcr.io/tpu-prod-env-multipod/${{ inputs.base_image }}
4550
steps:
4651
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
52+
if: ${{ !inputs.maxtext_installed }}
4753
- name: Download the MaxText wheel
54+
if: ${{ !inputs.maxtext_installed }}
4855
uses: actions/download-artifact@634f93cb2916e3fdff6788551b99b062d0335ce0
4956
with:
5057
name: maxtext-wheel
5158
- name: Install MaxText and Dependencies
59+
if: ${{ !inputs.maxtext_installed }}
5260
shell: bash
5361
run: |
5462
python3 -m uv venv --seed
@@ -59,10 +67,6 @@ jobs:
5967
uv pip install ${maxtext_wheel}[${MAXTEXT_PACKAGE_EXTRA}] --resolution=lowest
6068
uv pip install -r src/install_maxtext_extra_deps/extra_deps_from_github.txt
6169
62-
# Install dependencies for running notebooks
63-
uv pip install papermill ipykernel ipywidgets
64-
.venv/bin/python3 -m ipykernel install --user --name maxtext_venv
65-
6670
# Install Tunix for post-training notebooks
6771
uv pip install git+https://github.com/google/tunix
6872
@@ -82,9 +86,24 @@ jobs:
8286
env:
8387
HF_TOKEN: ${{ secrets.HF_TOKEN }}
8488
run: |
89+
if [ "${{ inputs.maxtext_installed }}" == "true" ]; then
90+
# Move to the directory where code is baked into the image. See the Dockerfile.
91+
# This is necessary because GHA sets an empty workspace by default.
92+
cd /deps
93+
PYTHON_EXE="python3"
94+
PAPERMILL_EXE="papermill"
95+
else
96+
PYTHON_EXE=".venv/bin/python3"
97+
PAPERMILL_EXE=".venv/bin/papermill"
98+
fi
99+
85100
MAXTEXT_REPO_ROOT=$(pwd)
86101
MAXTEXT_NOTEBOOKS_ROOT="$MAXTEXT_REPO_ROOT/src/MaxText/examples"
87102
103+
# Install dependencies for running notebooks
104+
$PYTHON_EXE -m pip install papermill ipykernel ipywidgets
105+
$PYTHON_EXE -m ipykernel install --user --name maxtext_venv
106+
88107
for notebook in "$MAXTEXT_NOTEBOOKS_ROOT"/{sft,rl}*.ipynb; do
89108
filename=$(basename "$notebook")
90109
output_name="${filename%.ipynb}_output.ipynb"
@@ -93,7 +112,7 @@ jobs:
93112
echo "Running $filename ..."
94113
echo "------------------------------------------------------"
95114
96-
.venv/bin/papermill "$notebook" "$output_name" -k maxtext_venv
115+
$PAPERMILL_EXE "$notebook" "$output_name" -k maxtext_venv
97116
done
98117
- name: Upload Outputs
99118
if: always()

.github/workflows/run_pathways_tests.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ on:
2525
device_name:
2626
required: true
2727
type: string
28-
image_type:
29-
required: false
28+
base_image:
29+
required: true
3030
type: string
3131
pytest_marker:
3232
required: true
@@ -58,7 +58,7 @@ jobs:
5858
run:
5959
runs-on: ${{ inputs.cloud_runner != '' && inputs.cloud_runner || fromJson(format('["self-hosted", "{0}", "{1}"]', inputs.device_type, inputs.device_name)) }}
6060
container:
61-
image: gcr.io/tpu-prod-env-multipod/maxtext-unit-test-tpu:${{ inputs.image_type != '' && inputs.image_type }}
61+
image: gcr.io/tpu-prod-env-multipod/${{ inputs.base_image }}
6262
env:
6363
XLA_PYTHON_CLIENT_MEM_FRACTION: ${{ inputs.xla_python_client_mem_fraction }}
6464
TF_FORCE_GPU_ALLOW_GROWTH: ${{ inputs.tf_force_gpu_allow_growth }}

0 commit comments

Comments
 (0)