Skip to content

Commit 75daff4

Browse files
committed
chore: Debug rest apis
Signed-off-by: ntkathole <nikhilkathole2683@gmail.com>
1 parent 02d5548 commit 75daff4

File tree

4 files changed

+206
-11
lines changed

4 files changed

+206
-11
lines changed

.github/workflows/registry-rest-api-tests.yml

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,46 @@ jobs:
160160
- name: Debug KIND Cluster when there is a failure
161161
if: failure()
162162
run: |
163-
kubectl get pods --all-namespaces
163+
echo "=== All pods ==="
164+
kubectl get pods --all-namespaces -o wide
165+
166+
echo ""
167+
echo "=== Test namespace pods ==="
168+
kubectl get pods -n test-ns-feast-rest -o wide 2>/dev/null || echo "Namespace not found"
169+
170+
echo ""
171+
echo "=== Services & Endpoints ==="
172+
kubectl get svc,endpoints -n test-ns-feast-rest 2>/dev/null || true
173+
174+
echo ""
175+
echo "=== Ingress ==="
176+
kubectl describe ingress -n test-ns-feast-rest 2>/dev/null || true
177+
178+
echo ""
179+
echo "=== FeatureStore CRs ==="
180+
kubectl get feast -n test-ns-feast-rest -o wide 2>/dev/null || true
181+
182+
echo ""
183+
echo "=== Non-normal events ==="
184+
kubectl get events -n test-ns-feast-rest --sort-by=.lastTimestamp --field-selector=type!=Normal 2>/dev/null || true
185+
186+
echo ""
187+
echo "=== Pod logs (last 80 lines each) ==="
188+
for pod in $(kubectl get pods -n test-ns-feast-rest --no-headers -o custom-columns=:metadata.name 2>/dev/null); do
189+
echo "--- Logs: $pod ---"
190+
kubectl logs "$pod" -n test-ns-feast-rest --tail=80 --all-containers 2>/dev/null || true
191+
done
192+
193+
echo ""
194+
echo "=== Ingress controller logs ==="
195+
kubectl logs -n ingress-nginx -l app.kubernetes.io/component=controller --tail=40 2>/dev/null || true
196+
197+
echo ""
198+
echo "=== Feast operator logs ==="
199+
kubectl logs -n feast-operator-system -l control-plane=controller-manager --tail=40 2>/dev/null || true
200+
201+
echo ""
202+
echo "=== Node status ==="
164203
kubectl describe nodes
165204
166205
- name: Clean up

sdk/python/feast/infra/registry/sql.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
update,
2424
)
2525
from sqlalchemy.engine import Engine
26+
from sqlalchemy.exc import IntegrityError
2627

2728
from feast import utils
2829
from feast.base_feature_view import BaseFeatureView
@@ -1028,8 +1029,15 @@ def _maybe_init_project_metadata(self, project):
10281029
"last_updated_timestamp": update_time,
10291030
"project_id": project,
10301031
}
1031-
insert_stmt = insert(feast_metadata).values(values)
1032-
conn.execute(insert_stmt)
1032+
try:
1033+
with conn.begin_nested():
1034+
conn.execute(insert(feast_metadata).values(values))
1035+
except IntegrityError:
1036+
logger.info(
1037+
"Project metadata for %s already initialized by "
1038+
"another process.",
1039+
project,
1040+
)
10331041

10341042
def _delete_object(
10351043
self,

sdk/python/tests/integration/rest_api/conftest.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
create_route,
1414
delete_namespace,
1515
deploy_and_validate_pod,
16+
dump_kubernetes_diagnostics,
1617
execPodCommand,
1718
get_pod_name_by_prefix,
1819
run_kubectl_apply_with_sed,
@@ -38,7 +39,12 @@ def get(self, endpoint, params=None):
3839
return requests.get(url, params=params, verify=False)
3940

4041

41-
def _wait_for_http_ready(route_url: str, timeout: int = 180, interval: int = 5) -> None:
42+
def _wait_for_http_ready(
43+
route_url: str,
44+
namespace: str = "test-ns-feast-rest",
45+
timeout: int = 300,
46+
interval: int = 5,
47+
) -> None:
4248
"""
4349
Poll the HTTP endpoint until it returns a non-502 response.
4450
@@ -49,6 +55,8 @@ def _wait_for_http_ready(route_url: str, timeout: int = 180, interval: int = 5)
4955
health_url = f"{route_url}/api/v1/projects"
5056
deadline = time.time() + timeout
5157
last_status = None
58+
last_body = None
59+
consecutive_errors = 0
5260

5361
print(
5462
f"\n Waiting for HTTP endpoint to become ready (timeout={timeout}s): {health_url}"
@@ -58,18 +66,37 @@ def _wait_for_http_ready(route_url: str, timeout: int = 180, interval: int = 5)
5866
try:
5967
resp = requests.get(health_url, timeout=10, verify=False)
6068
last_status = resp.status_code
69+
last_body = resp.text[:500] if resp.text else ""
6170
if resp.status_code != 502:
6271
print(f" HTTP endpoint is ready (status={resp.status_code})")
6372
return
73+
consecutive_errors += 1
6474
print(
6575
f" HTTP endpoint returned {resp.status_code}, retrying in {interval}s..."
6676
)
77+
if consecutive_errors % 6 == 0:
78+
elapsed = timeout - (deadline - time.time())
79+
print(
80+
f" Still waiting after ~{int(elapsed)}s. "
81+
f"Response body: {last_body[:200]}"
82+
)
6783
except requests.exceptions.RequestException as exc:
6884
last_status = str(exc)
85+
last_body = None
86+
consecutive_errors += 1
6987
print(f" HTTP request failed ({exc}), retrying in {interval}s...")
7088

7189
time.sleep(interval)
7290

91+
print(
92+
f"\n HTTP endpoint did not become ready within {timeout}s. "
93+
f"Last status: {last_status}"
94+
)
95+
if last_body:
96+
print(f" Last response body: {last_body[:500]}")
97+
98+
dump_kubernetes_diagnostics(namespace)
99+
73100
raise RuntimeError(
74101
f"HTTP endpoint {health_url} did not become ready within {timeout}s "
75102
f"(last status: {last_status})"
@@ -184,7 +211,7 @@ def feast_rest_client():
184211
# Wait for the HTTP endpoint to become ready before running tests.
185212
# Pod/CR readiness does not guarantee the backend is serving traffic;
186213
# the ingress may return 502 while the Feast server is still starting.
187-
_wait_for_http_ready(route_url)
214+
_wait_for_http_ready(route_url, namespace=namespace)
188215

189216
print(f"\n Connected to Feast REST at: {route_url}")
190217
yield FeastRestClient(route_url)

sdk/python/tests/integration/rest_api/support.py

Lines changed: 127 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44

55
from kubernetes import client
66

7+
DEFAULT_EXEC_TIMEOUT = 300
78

8-
def run_command(cmd, cwd=None, check=True):
9+
10+
def run_command(cmd, cwd=None, check=True, timeout=None):
911
print(f"Running command: {' '.join(cmd)}")
10-
result = subprocess.run(cmd, cwd=cwd, capture_output=True, text=True, check=True)
12+
result = subprocess.run(
13+
cmd, cwd=cwd, capture_output=True, text=True, check=True, timeout=timeout
14+
)
1115
if check and result.returncode != 0:
1216
print(result.stdout)
1317
print(result.stderr)
@@ -30,14 +34,23 @@ def delete_namespace(api_instance, namespace_name):
3034
api_instance.delete_namespace(namespace_name)
3135

3236

33-
def run_kubectl_command(args):
37+
def run_kubectl_command(args, timeout=None):
3438
try:
3539
result = subprocess.run(
36-
["kubectl"] + args, capture_output=True, text=True, check=True
40+
["kubectl"] + args,
41+
capture_output=True,
42+
text=True,
43+
check=True,
44+
timeout=timeout,
3745
)
3846
return result.stdout.strip()
47+
except subprocess.TimeoutExpired:
48+
print(f"TIMEOUT executing 'kubectl {' '.join(args)}' after {timeout}s")
49+
return None
3950
except subprocess.CalledProcessError as e:
4051
print(f"Error executing 'kubectl {' '.join(args)}': {e}")
52+
if e.stderr:
53+
print(f" stderr: {e.stderr.strip()}")
4154
return None
4255

4356

@@ -236,8 +249,116 @@ def applyFeastProject(namespace, feast_project):
236249
return apply_output
237250

238251

239-
def execPodCommand(namespace, podName, command_args):
252+
def execPodCommand(namespace, podName, command_args, timeout=DEFAULT_EXEC_TIMEOUT):
240253
apply_args = ["exec", podName, "-n", namespace, "--"] + command_args
241-
apply_output = run_kubectl_command(apply_args)
254+
apply_output = run_kubectl_command(apply_args, timeout=timeout)
242255
print("Output of args apply:\n", apply_output)
243256
return apply_output
257+
258+
259+
def _kubectl_print(args, timeout=30):
260+
"""Run a kubectl command and print its output (for diagnostics)."""
261+
output = run_kubectl_command(args, timeout=timeout)
262+
if output:
263+
print(output)
264+
return output
265+
266+
267+
def dump_kubernetes_diagnostics(namespace):
268+
"""Dump diagnostic info for debugging infrastructure failures."""
269+
separator = "=" * 60
270+
print(f"\n{separator}")
271+
print(f" KUBERNETES DIAGNOSTICS FOR NAMESPACE: {namespace}")
272+
print(f"{separator}\n")
273+
274+
print("--- Pod status ---")
275+
_kubectl_print(["get", "pods", "-n", namespace, "-o", "wide"])
276+
277+
print("\n--- Pod descriptions (non-Running) ---")
278+
pods_output = run_kubectl_command(
279+
[
280+
"get",
281+
"pods",
282+
"-n",
283+
namespace,
284+
"--no-headers",
285+
"-o",
286+
"custom-columns=NAME:.metadata.name,STATUS:.status.phase",
287+
],
288+
timeout=30,
289+
)
290+
if pods_output:
291+
for line in pods_output.splitlines():
292+
parts = line.split()
293+
if len(parts) == 2 and parts[1] != "Running":
294+
print(f"\n Describing non-running pod: {parts[0]}")
295+
_kubectl_print(["describe", "pod", parts[0], "-n", namespace])
296+
297+
print("\n--- Services and endpoints ---")
298+
_kubectl_print(["get", "svc", "-n", namespace])
299+
_kubectl_print(["get", "endpoints", "-n", namespace])
300+
301+
print("\n--- Ingress ---")
302+
_kubectl_print(["get", "ingress", "-n", namespace, "-o", "wide"])
303+
_kubectl_print(["describe", "ingress", "-n", namespace])
304+
305+
print("\n--- FeatureStore CRs ---")
306+
_kubectl_print(["get", "feast", "-n", namespace, "-o", "wide"])
307+
308+
print("\n--- Warning/Error events ---")
309+
_kubectl_print(
310+
[
311+
"get",
312+
"events",
313+
"-n",
314+
namespace,
315+
"--sort-by=.lastTimestamp",
316+
"--field-selector=type!=Normal",
317+
]
318+
)
319+
320+
print("\n--- Pod logs (last 50 lines each) ---")
321+
pods_names = run_kubectl_command(
322+
[
323+
"get",
324+
"pods",
325+
"-n",
326+
namespace,
327+
"--no-headers",
328+
"-o",
329+
"custom-columns=:metadata.name",
330+
],
331+
timeout=30,
332+
)
333+
if pods_names:
334+
for pod_name in pods_names.splitlines():
335+
pod_name = pod_name.strip()
336+
if not pod_name:
337+
continue
338+
print(f"\n --- Logs for pod: {pod_name} ---")
339+
_kubectl_print(
340+
[
341+
"logs",
342+
pod_name,
343+
"-n",
344+
namespace,
345+
"--tail=50",
346+
"--all-containers",
347+
]
348+
)
349+
350+
print("\n--- Ingress controller logs (last 30 lines) ---")
351+
_kubectl_print(
352+
[
353+
"logs",
354+
"-n",
355+
"ingress-nginx",
356+
"-l",
357+
"app.kubernetes.io/component=controller",
358+
"--tail=30",
359+
]
360+
)
361+
362+
print(f"\n{separator}")
363+
print(" END DIAGNOSTICS")
364+
print(f"{separator}\n")

0 commit comments

Comments
 (0)