Skip to content

Commit a41240d

Browse files
jeffcrouseclaude
andcommitted
Add scanner tests with hash-based relocation
- Add CC0 test MP3 fixtures from SoundSafari/CC0-1.0-Music - Add comprehensive scanner tests including: - Hash computation consistency - Metadata extraction - File discovery in subdirectories - Hash-based relocation detection - Missing/recovered file handling - Multi-file relocation scenarios - Add file_hash index migration for faster lookups - Add pytest-asyncio dependency 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent d02f77e commit a41240d

File tree

14 files changed

+541
-8
lines changed

14 files changed

+541
-8
lines changed

backend/app/db/models.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ class Track(Base):
168168

169169
id: Mapped[UUID] = mapped_column(primary_key=True, default=uuid4)
170170
file_path: Mapped[str] = mapped_column(String(1000), unique=True, nullable=False)
171-
file_hash: Mapped[str] = mapped_column(String(64), nullable=False)
171+
file_hash: Mapped[str] = mapped_column(String(64), nullable=False, index=True)
172172

173173
# Basic metadata from tags
174174
title: Mapped[str | None] = mapped_column(String(500))

backend/app/services/scanner.py

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,15 @@ async def scan(self, library_path: Path, full_scan: bool = False) -> dict[str, A
237237
logger.info("Loading existing tracks from database...")
238238
existing_tracks = await self._get_existing_tracks()
239239
existing_paths = {t.file_path: t for t in existing_tracks}
240-
logger.info(f"Found {len(existing_tracks)} existing tracks in database")
240+
241+
# Build hash lookup for detecting relocated files (same content, different path)
242+
# First track wins in case of hash collisions (shouldn't happen normally)
243+
existing_hashes: dict[str, Track] = {}
244+
for t in existing_tracks:
245+
if t.file_hash and t.file_hash not in existing_hashes:
246+
existing_hashes[t.file_hash] = t
247+
248+
logger.info(f"Found {len(existing_tracks)} existing tracks in database ({len(existing_hashes)} unique hashes)")
241249

242250
# Find all audio files (in thread pool to not block)
243251
logger.info(f"Discovering audio files in {library_path}...")
@@ -296,12 +304,29 @@ def discovery_progress(dirs_scanned: int, files_found: int):
296304
)
297305

298306
if path_str not in existing_paths:
299-
# New file
300-
logger.info(f"NEW: {file_path.name}")
301-
track = await self._create_track(file_path, file_hash, file_mtime)
302-
pending_analysis_ids.append(str(track.id))
303-
results["new"] += 1
304-
results["queued"] += 1
307+
# Path not found - check if same file exists at different path (by hash)
308+
if file_hash in existing_hashes:
309+
# Found by hash - this is a relocated file, update its path
310+
existing = existing_hashes[file_hash]
311+
old_path = existing.file_path
312+
logger.info(f"RELOCATED (by hash): {Path(old_path).name} -> {path_str}")
313+
existing.file_path = path_str
314+
existing.status = TrackStatus.ACTIVE
315+
existing.missing_since = None
316+
results["relocated"] += 1
317+
# Update path lookup so we don't process old path as missing
318+
existing_paths[path_str] = existing
319+
if old_path in existing_paths:
320+
del existing_paths[old_path]
321+
else:
322+
# Truly new file
323+
logger.info(f"NEW: {file_path.name}")
324+
track = await self._create_track(file_path, file_hash, file_mtime)
325+
pending_analysis_ids.append(str(track.id))
326+
results["new"] += 1
327+
results["queued"] += 1
328+
# Add to hash lookup so subsequent files with same hash are detected
329+
existing_hashes[file_hash] = track
305330
else:
306331
existing = existing_paths[path_str]
307332

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""add file_hash index
2+
3+
Revision ID: 54718ec827f6
4+
Revises: baseline
5+
Create Date: 2025-12-31 14:55:19.935331
6+
"""
7+
from typing import Sequence, Union
8+
9+
from alembic import op
10+
11+
# revision identifiers, used by Alembic.
12+
revision: str = '54718ec827f6'
13+
down_revision: Union[str, None] = 'baseline'
14+
branch_labels: Union[str, Sequence[str], None] = None
15+
depends_on: Union[str, Sequence[str], None] = None
16+
17+
18+
def upgrade() -> None:
19+
# Add index on file_hash for hash-based track matching
20+
op.create_index(op.f('ix_tracks_file_hash'), 'tracks', ['file_hash'], unique=False)
21+
22+
23+
def downgrade() -> None:
24+
op.drop_index(op.f('ix_tracks_file_hash'), table_name='tracks')

backend/pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ ignore_errors = true
103103

104104
[tool.pytest.ini_options]
105105
asyncio_mode = "auto"
106+
asyncio_default_fixture_loop_scope = "function"
106107
testpaths = ["tests"]
107108

108109
[tool.uv]
1.14 MB
Binary file not shown.
1.22 MB
Binary file not shown.
3.54 MB
Binary file not shown.
2.85 MB
Binary file not shown.
872 KB
Binary file not shown.
4.97 MB
Binary file not shown.

0 commit comments

Comments
 (0)