xla/.bazelrc at master · pytorch/xla · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
############################################################################
# All default build options below.

# Make Bazel print out all options from rc files.
build --announce_rc

# TODO(goranpetrovic): figure out visibility of tensorflow libraries.
build --nocheck_visibility

build --enable_platform_specific_config

build --experimental_cc_shared_library

# Disable enabled-by-default TensorFlow features that we don't care about.
build --define=no_aws_support=true
build --define=no_hdfs_support=true
build --define=no_hdfs_support=true
build --define=no_kafka_support=true
build --define=no_ignite_support=true

build --define=grpc_no_ares=true

build -c opt

build --config=short_logs

# PyTorch/XLA uses exceptions to communicate with Python.
build --copt=-fexceptions

# Force GCC because clang/bazel has issues.
build --action_env=CC=gcc
build --action_env=CXX=g++
build --spawn_strategy=standalone

# TODO(https://github.com/pytorch/xla/issues/9336): Enable bzlmod.
build --noenable_bzlmod

###########################################################################

build:clang --action_env=CC=/usr/bin/clang-17
build:clang --action_env=CXX=/usr/bin/clang++-17

# clang requires the sandbox mode. Without this, `bazel build` generates an
# error like:
#
# ERROR: .../external/llvm-project/llvm/BUILD.bazel:2006:11: Compiling
# llvm/lib/CodeGenTypes/LowLevelType.cpp failed: undeclared inclusion(s) in
# rule '@llvm-project//llvm:CodeGenTypes':
# this rule is missing dependency declarations for the following files
# included by 'llvm/lib/CodeGenTypes/LowLevelType.cpp':
#   'bazel-out/k8-opt/bin/external/llvm-project/llvm/config.cppmap'
#   'bazel-out/k8-opt/bin/external/llvm-project/llvm/Demangle.cppmap'
#
# The sandbox mode requires all source files to be readable by others, as
# inside the docker we build PyTorch/XLA as root, not the original user who
# created the files. You can ensure this by either:
# 1. `umask 022` before running the `git clone` commands, or
# 2. after running the `git clone` commands, go to the workspace directory
#    and run `chmod -R o+rX .`. The X (capital X) is important: it gives
#    execute permission to directories, and to files only if they already have
#    execute permission for the owner (or group or others). This is generally
#    safer than o+rx.
build:clang --spawn_strategy=sandboxed

###########################################################################

# Make all symbols hidden, by default.
# Ref: https://github.com/pytorch/xla/pull/9693
#
# Without this flag, GCC was associating some variables as "global unique".
# Meaning that the linker would make sure that there was only one instance
# of that variable per process.
#
# Which Variables?
# ================
# - Function-local static variables
# - Inside an inline function (e.g. class member functions)
# - Inside an non-internal linkage context
#
# One example is: `protobuf` holds [a `ShutdownData`][1] which is function-local
# and static, living inside an inline function, which lives inside a class with
# external linkage.
#
# What Happens?
# =============
# [GCC marks those variables with STB_GNU_UNIQUE binding][2], making sure those
# variables are unique for the entire process.
#
# Alternatives
# ============
# There is also the `--no-gnu-unique` option, which disables exactly that.
# However, I believe this is a good opportunity to make everything hidden, unless
# otherwise specified.
#
# [1]: https://github.com/protocolbuffers/protobuf/blob/13fe37f25f187b7a2e79faa962df6a69bdb3d5b8/src/google/protobuf/message_lite.cc#L776
# [2]: https://gcc.gnu.org/onlinedocs/gcc/Code-Gen-Options.html?utm_source=chatgpt.com#index-fno-gnu-unique
build:posix --copt=-fvisibility=hidden

build:posix --copt=-Wno-sign-compare
build:posix --cxxopt=-std=c++17
build:posix --host_cxxopt=-std=c++17

build:avx_posix --copt=-mavx
build:avx_posix --host_copt=-mavx

build:avx_linux --copt=-mavx
build:avx_linux --host_copt=-mavx

build:native_arch_posix --copt=-march=native
build:native_arch_posix --host_copt=-march=native

build:mkl_open_source_only --define=tensorflow_mkldnn_contraction_kernel=1

build:acl --define==build_with_acl=true

build:nonccl --define=no_nccl_support=true

build:linux --config=posix

# Suppress all warning messages.
build:short_logs --output_filter=DONT_MATCH_ANYTHING

#build:tpu --@xla//xla/python:enable_tpu=true
build:tpu --define=with_tpu_support=true

# Run tests serially with TPU (only 1 device is available).
test:tpu --local_test_jobs=1

#########################################################################
# RBE config options below.
# Flag to enable remote config
common --experimental_repo_remote_exec

# Inherit environmental variables that are used in testing.
test --test_env=TPU_NUM_DEVICES --test_env=CPU_NUM_DEVICES --test_env=XRT_LOCAL_WORKER
test --test_env=XRT_TPU_CONFIG --test_env=XRT_DEVICE_MAP --test_env=XRT_WORKERS --test_env=XRT_MESH_SERVICE_ADDRESS
test --test_env=XRT_SHARD_WORLD_SIZE --test_env=XRT_MULTI_PROCESSING_DEVICE --test_env=XRT_HOST_ORDINAL --test_env=XRT_SHARD_ORDINAL
test --test_env=XRT_START_LOCAL_SERVER --test_env=TPUVM_MODE --test_env=PJRT_DEVICE --test_env=PJRT_TPU_MAX_INFLIGHT_COMPUTATIONS
test --test_env=PJRT_CPU_ASYNC_CLIENT --test_env=TPU_LIBRARY_PATH --test_env=PJRT_DIST_SERVICE_ADDR
test --test_env=PJRT_LOCAL_PROCESS_RANK

# This environmental variable is important for properly integrating with XLA.
test --test_env=XLA_EXPERIMENTAL

# To find `libpython` that is required to run tests (they run using installed wheels).
test --test_env=LD_LIBRARY_PATH

# This fixes an issue where targets are configured differently because of `test_filter`.
# https://github.com/bazelbuild/bazel/issues/6842
test --notrim_test_configuration

# Stabilize the environmental variables used to minimize cache misses (src and env affects cache keys).
build --incompatible_strict_action_env

# By default in local builds, do not upload local results to cache.
build --noremote_upload_local_results

# Remote caching with local builds.
build:remote_cache --remote_cache=grpcs://remotebuildexecution.googleapis.com
build:remote_cache --remote_instance_name=projects/tpu-pytorch/instances/default_instance
build:remote_cache --google_default_credentials
build:remote_cache --remote_upload_local_results
build:remote_cache --bes_backend=buildeventservice.googleapis.com
build:remote_cache --bes_upload_mode=fully_async
build:remote_cache --bes_results_url="https://source.cloud.google.com/results/invocations"
build:remote_cache --bes_instance_name="tpu-pytorch"
build:remote_cache --bes_timeout=600s  # On longer builds, BES can cause a non-zero exit from bazel.

# Attempt to minimize the amount of data transfer between bazel and the remote
# workers:
build:remote_cache --remote_download_toplevel
#########################################################################

# Load rc file with user-specific options.
try-import %workspace%/.bazelrc.user

# Compile database generation config.
build:compdb --features=-layering_check

# Compiling tests requires Java.
build --java_runtime_version=remotejdk_11

# Coverage setup.
coverage --build_tests_only
coverage --config=coverage
coverage --instrumentation_filter="//torch_xla[/:],//third_party[/:],-//test[/:]"
coverage --combined_report=lcov
coverage --nocache_test_results
build:coverage --strategy=CoverageReport=sandboxed,local

build:coverage --test_tag_filters=-nocoverage

############################################################################
############## TensorFlow .bazelrc greatest hits ###########################
############################################################################

# Modular TF build options
build:dynamic_kernels --define=dynamic_loaded_kernels=true
build:dynamic_kernels --copt=-DAUTOLOAD_DYNAMIC_KERNELS
build --define=tf_api_version=2 --action_env=TF2_BEHAVIOR=1

# Default paths for TF_SYSTEM_LIBS
build:linux --define=PREFIX=/usr
build:linux --define=LIBDIR=$(PREFIX)/lib
build:linux --define=INCLUDEDIR=$(PREFIX)/include
build:linux --define=PROTOBUF_INCLUDE_PATH=$(PREFIX)/include

build:linux --define=build_with_onednn_v2=true

# On linux, we dynamically link small amount of kernels
build:linux --config=dynamic_kernels

# For projects which use TensorFlow as part of a Bazel build process, putting
# nothing in a bazelrc will default to a monolithic build. Here we force
# the monolitih build because otherwise there are missing dependencies and
# linking fails.
build --define framework_shared_object=false
build --define tsl_protobuf_header_only=false

build --define=use_fast_cpp_protos=true
build --define=allow_oversize_protos=true

# Enable XLA support by default.
build --define=with_xla_support=true

# Disable some xnnpack compilation flags which are not supported before gcc-13.
build:linux --define=xnn_enable_avxvnni=false
build:linux --define=xnn_enable_avx256vnni=false
build:linux --define=xnn_enable_avxvnniint8=false
build:linux --define=xnn_enable_avx512amx=false
build:linux --define=xnn_enable_avx512fp16=false

# See https://github.com/bazelbuild/bazel/issues/7362 for information on what
# --incompatible_remove_legacy_whole_archive flag does.
# This flag is set to true in Bazel 1.0 and newer versions. We tried to migrate
# Tensorflow to the default, however test coverage wasn't enough to catch the
# errors.
# There is ongoing work on Bazel team's side to provide support for transitive
# shared libraries. As part of migrating to transitive shared libraries, we
# hope to provide a better mechanism for control over symbol exporting, and
# then tackle this issue again.
#
# TODO: Remove this line once TF doesn't depend on Bazel wrapping all library
# archives in -whole_archive -no_whole_archive.
build --noincompatible_remove_legacy_whole_archive

# cc_shared_library ensures no library is linked statically more than once.
build --experimental_link_static_libraries_once=false

# Do not risk cache corruption. See:
# https://github.com/bazelbuild/bazel/issues/3360
build:linux --experimental_guard_against_concurrent_changes

# Prevent regressions on those two incompatible changes
# TODO: remove those flags when they are flipped in the default Bazel version TF uses.
build --incompatible_enforce_config_setting_visibility

# Suppress most C++ complier warnings to reduce log size but allow
# for specific warnings to still be present.
build:linux --copt="-Wno-all"
build:linux --copt="-Wno-extra"
build:linux --copt="-Wno-deprecated"
build:linux --copt="-Wno-deprecated-declarations"
build:linux --copt="-Wno-ignored-attributes"
build:linux --copt="-Wno-array-bounds"
# Add unused-result as an error on Linux.
build:linux --copt="-Wunused-result"
build:linux --copt="-Werror=unused-result"
# Add switch as an error on Linux.
build:linux --copt="-Wswitch"
build:linux --copt="-Werror=switch"
# Required for building with clang
build:linux --copt="-Wno-error=unused-but-set-variable"

# Only include debug info for files not under XLA.
build:dbg -c dbg
build:dbg --per_file_copt=external/xla/.*@-g0,-DNDEBUG