Skip to content

Commit b412a51

Browse files
committed
Enhance index management in SQL ingestion scripts for improved performance
- Added new indexes on the `notes`, `note_comments`, and `note_comment_texts` tables to optimize query performance for monitoring and analytics. - Removed redundant indexes to streamline database operations, ensuring that existing primary keys and spatial indexes are utilized effectively. - Introduced partial indexes for recent updates and quality checks to enhance data retrieval efficiency for specific use cases. - Updated comments to clarify the benefits and usage of each index, improving maintainability and understanding of the database schema.
1 parent 1adb7d0 commit b412a51

File tree

5 files changed

+235
-180
lines changed

5 files changed

+235
-180
lines changed

sql/ingestion/create_indexes.sql

Lines changed: 57 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,61 @@
77
-- See optimization_recommendations.md for details
88

99
-- Notes table indexes
10+
-- Index: idx_notes_updated_at (updated_at DESC)
11+
-- Benefits: Monitoring (data_freshness.sql - data freshness queries, optimized_queries/data_freshness_optimized.sql),
12+
-- Analytics (queries identifying recently updated notes for incremental processing)
13+
-- Used by: Monitoring data freshness queries, incremental ETL processing
1014
CREATE INDEX IF NOT EXISTS idx_notes_updated_at ON notes(updated_at DESC);
15+
16+
-- Index: idx_notes_created_at (created_at DESC)
17+
-- Benefits: Monitoring (queries ordering by creation date descending),
18+
-- API (already covered by notes_created but this optimizes ORDER BY DESC specifically)
19+
-- Used by: Monitoring queries with DESC ordering, API pagination with DESC
1120
CREATE INDEX IF NOT EXISTS idx_notes_created_at ON notes(created_at DESC);
12-
CREATE INDEX IF NOT EXISTS idx_notes_note_id ON notes(note_id);
13-
CREATE INDEX IF NOT EXISTS idx_notes_coordinates ON notes(latitude, longitude);
21+
22+
-- REMOVED: idx_notes_note_id - Redundant with PRIMARY KEY on note_id
23+
-- The PK already provides an index for note_id lookups and JOINs
24+
25+
-- REMOVED: idx_notes_coordinates - Redundant with notes_spatial GIST index
26+
-- The GIST spatial index (notes_spatial) is superior for all geographic queries including bounding boxes
1427

1528
-- Partial index for recent updates (optimizes freshness queries)
29+
-- Index: idx_notes_recent_updates (updated_at DESC) - Partial Index
30+
-- Benefits: Monitoring (data_freshness.sql - queries for recent updates in last 30 days)
31+
-- Used by: Monitoring queries that only need recent data (last 30 days), optimizes freshness checks
1632
CREATE INDEX IF NOT EXISTS idx_notes_recent_updates
1733
ON notes(updated_at DESC)
1834
WHERE updated_at > NOW() - INTERVAL '30 days';
1935

2036
-- Note comments table indexes
37+
-- Index: idx_note_comments_note_id (note_id)
38+
-- Benefits: Similar to note_comments_id but created by monitoring component
39+
-- API (noteService.ts - JOINs), Analytics (ETL staging JOINs)
40+
-- Used by: Same as note_comments_id - JOINs between notes and comments
2141
CREATE INDEX IF NOT EXISTS idx_note_comments_note_id ON note_comments(note_id);
42+
43+
-- Index: idx_note_comments_created_at (created_at DESC)
44+
-- Benefits: Monitoring (queries ordering comments by date descending)
45+
-- Similar to note_comments_created but with explicit DESC ordering
46+
-- Used by: Monitoring queries with DESC ordering, API queries ordering comments descending
2247
CREATE INDEX IF NOT EXISTS idx_note_comments_created_at ON note_comments(
2348
created_at DESC
2449
);
50+
51+
-- Index: idx_note_comments_note_id_created_at (note_id, created_at DESC)
52+
-- Benefits: Monitoring (queries getting most recent comments per note),
53+
-- API (optimizes queries ordering comments descending)
54+
-- Similar to note_comments_id_created but with explicit DESC ordering
55+
-- Used by: Queries getting most recent comments per note, API getNoteComments with DESC ordering
2556
CREATE INDEX IF NOT EXISTS idx_note_comments_note_id_created_at ON note_comments(
2657
note_id, created_at DESC
2758
);
2859

2960
-- Note comment texts table indexes
61+
-- Index: idx_note_comment_texts_comment_id (comment_id)
62+
-- Benefits: API (noteService.ts:137 - JOIN using comment_id: LEFT JOIN note_comments_text ON nc.comment_id = nct.comment_id),
63+
-- Analytics (JOINs relating comments with their texts)
64+
-- Used by: JOINs between note_comments and note_comments_text using comment_id
3065
CREATE INDEX IF NOT EXISTS idx_note_comment_texts_comment_id ON note_comment_texts(
3166
comment_id
3267
);
@@ -36,20 +71,40 @@ CREATE INDEX IF NOT EXISTS idx_note_comment_texts_comment_id ON note_comment_tex
3671
DO $$
3772
BEGIN
3873
IF EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'processing_log') THEN
74+
-- Index: idx_processing_log_execution_time (execution_time DESC)
75+
-- Benefits: Monitoring (queries ordering logs by execution time, performance analysis)
76+
-- Used by: Queries ordering logs by execution time, performance analysis
3977
CREATE INDEX IF NOT EXISTS idx_processing_log_execution_time ON processing_log(execution_time DESC);
78+
79+
-- Index: idx_processing_log_status (status)
80+
-- Benefits: Monitoring (processing_status.sql - filters logs by status: success, failed, etc.)
81+
-- Used by: Monitoring queries finding failed processes, filtering by status
4082
CREATE INDEX IF NOT EXISTS idx_processing_log_status ON processing_log(status);
83+
84+
-- Index: idx_processing_log_status_execution_time (status, execution_time DESC)
85+
-- Benefits: Monitoring (queries filtering by status and ordering by execution time)
86+
-- Used by: Queries finding most recent failed processes, status-based analysis with time ordering
4187
CREATE INDEX IF NOT EXISTS idx_processing_log_status_execution_time ON processing_log(status, execution_time DESC);
4288

4389
-- Covering index for common queries
90+
-- Index: idx_processing_log_covering (status, execution_time DESC, duration_seconds, notes_processed)
91+
-- Benefits: Monitoring (covering index includes all columns needed for common queries, avoids table access)
92+
-- Used by: Common monitoring queries that need status, time, duration, and notes_processed without accessing table
4493
CREATE INDEX IF NOT EXISTS idx_processing_log_covering
4594
ON processing_log(status, execution_time DESC, duration_seconds, notes_processed);
4695
END IF;
4796
END $$;
4897

4998
-- Hash index for duplicate detection (if needed)
99+
-- Index: idx_notes_note_id_hash (note_id) - HASH
100+
-- Benefits: Monitoring (duplicate detection), Ingestion (quick note_id existence checks, though PK already exists)
101+
-- Used by: Duplicate detection queries, fast note_id lookups (though PK already provides this)
50102
CREATE INDEX IF NOT EXISTS idx_notes_note_id_hash ON notes USING HASH(note_id);
51103

52104
-- Partial index for quality checks
105+
-- Index: idx_notes_quality_check (id) - Partial Index
106+
-- Benefits: Monitoring (data_quality.sql - identifies notes with data quality issues)
107+
-- Used by: Quickly finding notes with missing coordinates or inconsistent timestamps (latitude IS NULL OR longitude IS NULL OR updated_at < created_at)
53108
CREATE INDEX IF NOT EXISTS idx_notes_quality_check
54109
ON notes(id)
55110
WHERE latitude IS NULL OR longitude IS NULL OR updated_at < created_at;

0 commit comments

Comments
 (0)