diff --git a/apport/packaging_impl/apt_dpkg.py b/apport/packaging_impl/apt_dpkg.py index c95a532a5..9436188d8 100644 --- a/apport/packaging_impl/apt_dpkg.py +++ b/apport/packaging_impl/apt_dpkg.py @@ -37,6 +37,7 @@ import platform import re import shutil +import sqlite3 import stat import subprocess import sys @@ -276,6 +277,142 @@ def _usr_merge_alternative(path: str) -> str | None: return None +class _Path2Package(Mapping[str, str]): + """Path to Debian package mapping. + + A backing SQLite database is open on __init__ and closed on object + deletion. The data is stored in unnormalized form for creation speed + and code simplicity. + + If database_file is set to `None` an in-memory database will be used. + """ + + def __init__(self, database_file: pathlib.Path | None = None) -> None: + self.database_file = database_file + self.connection = self._connect() + if ( + database_file is None + or not database_file.exists() + or database_file.stat().st_size == 0 + ): + self._create_tables() + + def __del__(self) -> None: + """Close the SQLite database connection on object deletion.""" + if hasattr(self, "connection"): + self.connection.close() + + def _connect(self) -> sqlite3.Connection: + """Opens a connection to the SQLite database file. + + If database_file is set to `None` an in-memory database will be used. + """ + if self.database_file: + database = f"file://{self.database_file.absolute()}" + else: + database = ":memory:" + connection = sqlite3.connect(database) + if hasattr(connection, "autocommit"): + connection.autocommit = False + return connection + + def _create_tables(self) -> None: + """Create SQLite database tables.""" + cursor = self.connection.cursor() + cursor.execute( + "CREATE TABLE path_package(" + " path TEXT PRIMARY KEY UNIQUE NOT NULL," + " package TEXT NOT NULL)" + ) + self.connection.commit() + + def __getitem__(self, key: str) -> str: + cursor = self.connection.execute( + "SELECT package FROM path_package WHERE path = ?", (key,) + ) + found = cursor.fetchone() + if found is None: + raise KeyError(key) + return found[0] + + def __iter__(self) -> Iterator[str]: + cursor = self.connection.execute( + "SELECT path FROM path_package ORDER BY path ASC" + ) + while True: + found = cursor.fetchone() + if found is None: + return + yield found[0] + + def __len__(self) -> int: + cursor = self.connection.execute("SELECT COUNT(*) FROM path_package") + found = cursor.fetchone() + assert found is not None + return found[0] + + def __setitem__(self, key: str, value: str) -> None: + """Set new value in datadase. + + Warning: The new value is only inserted into the database but + not committed for better performance. A database commit needs + to be done to persist the change. + """ + self.connection.execute( + "INSERT INTO path_package VALUES(?, ?) " + "ON CONFLICT(path) DO UPDATE SET package=excluded.package", + (key, value), + ) + + def is_empty(self) -> bool: + """Check if the database is empty.""" + cursor = self.connection.execute("SELECT 1 FROM path_package LIMIT 1") + return cursor.fetchone() is None + + @staticmethod + def _insert_many(cursor: sqlite3.Cursor, path2pkg: Mapping[str, str]) -> None: + cursor.executemany( + "INSERT INTO path_package VALUES(?, ?) " + "ON CONFLICT(path) DO UPDATE SET package=excluded.package", + path2pkg.items(), + ) + + def update_from_contents_file( + self, contents_filename: str, dist: str, group_inserts: int = 100 + ) -> None: + """Update database with entries from the Contents file. + + Existing paths will be overwritten by new entries. + """ + cursor = self.connection.cursor() + path2pkg = {} + + path_exclude_pattern = re.compile( + r"^:|(boot|var|usr/(include|src|[^/]+/include" + r"|share/(doc|gocode|help|icons|locale|man|texlive)))/" + ) + with gzip.open(contents_filename, "rt") as contents: + if dist in {"trusty", "xenial"}: + # the first 32 lines are descriptive only for these + # releases + for _ in range(32): + next(contents) + + for line in contents: + if path_exclude_pattern.match(line): + continue + path, column2 = line.rsplit(maxsplit=1) + package = column2.split(",")[0].split("/")[-1] + + path2pkg[path] = package + if len(path2pkg) >= group_inserts: + self._insert_many(cursor, path2pkg) + path2pkg = {} + if path2pkg: + self._insert_many(cursor, path2pkg) + self.connection.commit() + + class _AptDpkgPackageInfo(PackageInfo): # pylint: disable=too-many-instance-attributes,too-many-public-methods """Concrete apport.packaging.PackageInfo class implementation for @@ -289,7 +426,7 @@ def __init__(self) -> None: self._contents_dir: str | None = None self._mirror: str | None = None self._virtual_mapping_obj: dict[str, set[str]] | None = None - self._contents_mapping_obj: dict[bytes, bytes] | None = None + self._contents_mapping_obj: _Path2Package | None = None self._launchpad_base = "https://api.launchpad.net/devel" self._contents_update = False @@ -322,44 +459,18 @@ def _save_virtual_mapping(self, configdir: str) -> None: def _contents_mapping( self, configdir: str, release: str, arch: str - ) -> dict[bytes, bytes]: - if ( - self._contents_mapping_obj - and self._contents_mapping_obj[b"release"] == release.encode() - and self._contents_mapping_obj[b"arch"] == arch.encode() - ): - return self._contents_mapping_obj - - mapping_file = os.path.join( - configdir, f"contents_mapping-{release}-{arch}.pickle" + ) -> _Path2Package: + mapping_file = ( + pathlib.Path(configdir) / f"contents_mapping-{release}-{arch}.sqlite3" ) - if os.path.exists(mapping_file) and os.stat(mapping_file).st_size == 0: - os.remove(mapping_file) - try: - with open(mapping_file, "rb") as fp: - self._contents_mapping_obj = pickle.load(fp) - assert isinstance(self._contents_mapping_obj, dict) - except (AssertionError, FileNotFoundError): - self._contents_mapping_obj = { - b"release": release.encode(), - b"arch": arch.encode(), - } + if self._contents_mapping_obj: + if self._contents_mapping_obj.database_file == mapping_file: + return self._contents_mapping_obj + del self._contents_mapping_obj + self._contents_mapping_obj = _Path2Package(mapping_file) return self._contents_mapping_obj - def _save_contents_mapping(self, configdir: str, release: str, arch: str) -> None: - mapping_file = os.path.join( - configdir, f"contents_mapping-{release}-{arch}.pickle" - ) - if self._contents_mapping_obj is not None: - try: - with open(mapping_file, "wb") as fp: - pickle.dump(self._contents_mapping_obj, fp) - # rather than crashing on systems with little memory just don't - # write the crash file - except MemoryError: - pass - def _clear_apt_cache(self) -> None: # The rootdir option to apt.Cache modifies the global state apt_pkg.config.clear("Dir") @@ -1683,55 +1794,27 @@ def _get_contents_file(self, map_cachedir: str, dist: str, arch: str) -> str | N return contents_filename - @staticmethod - def _update_given_file2pkg_mapping( - file2pkg: dict[bytes, bytes], contents_filename: str, dist: str - ) -> None: - path_exclude_pattern = re.compile( - rb"^:|(boot|var|usr/(include|src|[^/]+/include" - rb"|share/(doc|gocode|help|icons|locale|man|texlive)))/" - ) - with gzip.open(contents_filename, "rb") as contents: - if dist in {"trusty", "xenial"}: - # the first 32 lines are descriptive only for these - # releases - for _ in range(32): - next(contents) - - for line in contents: - if path_exclude_pattern.match(line): - continue - path, column2 = line.rsplit(maxsplit=1) - package = column2.split(b",")[0].split(b"/")[-1] - if path in file2pkg: - if package == file2pkg[path]: - continue - # if the package was updated use the update - # b/c everyone should have packages from - # -updates and -security installed - file2pkg[path] = package - def _get_file2pkg_mapping( self, map_cachedir: str, release: str, arch: str - ) -> dict[bytes, bytes]: + ) -> _Path2Package: # this is ordered by likelihood of installation with the most common # last # XXX - maybe we shouldn't check -security and -updates if it is the # devel release as they will be old and empty + path2package = self._contents_mapping(map_cachedir, release, arch) for pocket in ("-proposed", "", "-security", "-updates"): dist = f"{release}{pocket}" contents_filename = self._get_contents_file(map_cachedir, dist, arch) if contents_filename is None: continue - file2pkg = self._contents_mapping(map_cachedir, release, arch) # if the mapping is empty build it - if not file2pkg or len(file2pkg) == 2: + if path2package.is_empty(): self._contents_update = True # if any of the Contents files were updated we need to update the # map because the ordering in which is created is important if self._contents_update: - self._update_given_file2pkg_mapping(file2pkg, contents_filename, dist) - return file2pkg + path2package.update_from_contents_file(contents_filename, dist) + return path2package def _search_contents( self, file: str, map_cachedir: str | None, release: str | None, arch: str | None @@ -1750,21 +1833,16 @@ def _search_contents( release = self._distro_release_to_codename(release) contents_mapping = self._get_file2pkg_mapping(map_cachedir, release, arch) - # the file only needs to be saved after an update - if self._contents_update: - self._save_contents_mapping(map_cachedir, release, arch) - # the update of the mapping only needs to be done once - self._contents_update = False if file[0] != "/": file = f"/{file}" - files = [file[1:].encode()] + files = [file[1:]] usrmerge_file = _usr_merge_alternative(file) if usrmerge_file: - files.append(usrmerge_file[1:].encode()) + files.append(usrmerge_file[1:]) for filename in files: try: - pkg = contents_mapping[filename].decode() + pkg = contents_mapping[filename] return pkg except KeyError: pass diff --git a/tests/unit/test_packaging_apt_dpkg.py b/tests/unit/test_packaging_apt_dpkg.py index e3657e718..0e282a7a6 100644 --- a/tests/unit/test_packaging_apt_dpkg.py +++ b/tests/unit/test_packaging_apt_dpkg.py @@ -9,6 +9,8 @@ """Unit tests for apport.packaging_impl.apt_dpkg.""" +import pathlib +import sqlite3 import tempfile import unittest import unittest.mock @@ -20,6 +22,7 @@ from apport.packaging_impl.apt_dpkg import ( _map_mirror_to_arch, _parse_deb822_sources, + _Path2Package, _read_mirror_file, impl, ) @@ -175,16 +178,15 @@ def test_read_mirror_file(self) -> None: ) @unittest.mock.patch.object(impl, "_get_file2pkg_mapping") - @unittest.mock.patch.object(impl, "_save_contents_mapping", MagicMock()) def test_get_file_package_uninstalled_usrmerge( self, _get_file2pkg_mapping_mock: MagicMock ) -> None: """get_file_package() on uninstalled usrmerge packages.""" # Data from Ubuntu 24.04 (noble) _get_file2pkg_mapping_mock.return_value = { - b"usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2": b"libc6", - b"usr/lib/x86_64-linux-gnu/libc.so.6": b"libc6", - b"usr/libx32/libc.so.6": b"libc6-x32", + "usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2": "libc6", + "usr/lib/x86_64-linux-gnu/libc.so.6": "libc6", + "usr/libx32/libc.so.6": "libc6-x32", } pkg = impl.get_file_package( @@ -200,7 +202,7 @@ def test_contents_skip_xenial_header(self) -> None: """Test _update_given_file2pkg_mapping skipping xenial Contents header.""" # Header taken from # http://archive.ubuntu.com/ubuntu/dists/xenial/Contents-amd64.gz - contents = b"""\ + contents = """\ This file maps each file available in the Ubuntu system to the package from which it originates. It includes packages from the DIST distribution for the ARCH architecture. @@ -237,22 +239,21 @@ def test_contents_skip_xenial_header(self) -> None: bin/afio multiverse/utils/afio bin/archdetect utils/archdetect-deb """ - file2pkg: dict[bytes, bytes] = {} + file2pkg = _Path2Package() open_mock = unittest.mock.mock_open(read_data=contents) with unittest.mock.patch("gzip.open", open_mock): - # pylint: disable-next=protected-access - impl._update_given_file2pkg_mapping(file2pkg, "/fake_Contents", "xenial") + file2pkg.update_from_contents_file("/fake_Contents", "xenial", 2) self.assertEqual( - file2pkg, {b"bin/afio": b"afio", b"bin/archdetect": b"archdetect-deb"} + dict(file2pkg), {"bin/afio": "afio", "bin/archdetect": "archdetect-deb"} ) - open_mock.assert_called_once_with("/fake_Contents", "rb") + open_mock.assert_called_once_with("/fake_Contents", "rt") def test_contents_path_filering(self) -> None: """Test _update_given_file2pkg_mapping to ignore unrelevant files.""" # Test content taken from # http://archive.ubuntu.com/ubuntu/dists/noble/Contents-amd64.gz - contents = b"""\ + contents = """\ bin/ip net/iproute2 boot/ipxe.efi admin/grub-ipxe etc/dput.cf devel/dput @@ -283,14 +284,13 @@ def test_contents_path_filering(self) -> None: var/lib/ieee-data/iab.txt net/ieee-data """ - file2pkg: dict[bytes, bytes] = {} + file2pkg = _Path2Package() open_mock = unittest.mock.mock_open(read_data=contents) with unittest.mock.patch("gzip.open", open_mock): - # pylint: disable-next=protected-access - impl._update_given_file2pkg_mapping(file2pkg, "Contents-amd64", "noble") + file2pkg.update_from_contents_file("Contents-amd64", "noble", 10) self.assertEqual( - {k.decode(): v.decode() for k, v in file2pkg.items()}, + dict(file2pkg), { "bin/ip": "iproute2", "etc/dput.cf": "dput", @@ -309,7 +309,7 @@ def test_contents_path_filering(self) -> None: "usr/share/dicom3tools/gen.so": "dicom3tools", }, ) - open_mock.assert_called_once_with("Contents-amd64", "rb") + open_mock.assert_called_once_with("Contents-amd64", "rt") def test_contents_parse_path_with_spaces(self) -> None: """Test _update_given_file2pkg_mapping to parse Contents file correctly.""" @@ -322,21 +322,71 @@ def test_contents_parse_path_with_spaces(self) -> None: "/__init__.py universe/python/ilorest\n" ) - file2pkg: dict[bytes, bytes] = {} - open_mock = unittest.mock.mock_open(read_data=contents.encode()) + file2pkg = _Path2Package() + open_mock = unittest.mock.mock_open(read_data=contents) with unittest.mock.patch("gzip.open", open_mock): - # pylint: disable-next=protected-access - impl._update_given_file2pkg_mapping(file2pkg, "Contents-amd64", "noble") + file2pkg.update_from_contents_file("Contents-amd64", "noble") self.assertEqual( - {k.decode(): v.decode() for k, v in file2pkg.items()}, + dict(file2pkg), { "usr/lib/iannix/Tools/JavaScript Library.js": "iannix", "usr/lib/python3/dist-packages/ilorest/extensions/BIOS COMMANDS" "/__init__.py": "ilorest", }, ) - open_mock.assert_called_once_with("Contents-amd64", "rb") + open_mock.assert_called_once_with("Contents-amd64", "rt") + + def test_path2package(self) -> None: + """Basic tests for _Path2Package class.""" + path2package = _Path2Package() + self.assertEqual(dict(path2package), {}) + self.assertEqual(len(path2package), 0) + self.assertTrue(path2package.is_empty()) + path2package["usr/bin/man"] = "man-db" + path2package["bin/ip"] = "iproute2" + self.assertEqual( + dict(path2package), {"bin/ip": "iproute2", "usr/bin/man": "man-db"} + ) + self.assertEqual(len(path2package), 2) + self.assertEqual(path2package["bin/ip"], "iproute2") + self.assertEqual(path2package.get("usr/bin/man"), "man-db") + self.assertIsNone(path2package.get("usr/src/broadcom-sta.tar.xz")) + self.assertFalse(path2package.is_empty()) + self.assertIn("bin/ip", path2package) + self.assertNotIn("usr/bin/git", path2package) + self.assertNotIn(42, path2package) + + def test_path2package_overwrite(self) -> None: + """Test _Path2Package to overwrite existing values.""" + path2package = _Path2Package() + path2package["path/to/file"] = "package1" + self.assertEqual(path2package["path/to/file"], "package1") + path2package["path/to/file"] = "package2" + self.assertEqual(path2package["path/to/file"], "package2") + + def test_path2package_create_and_reopen(self) -> None: + """Test _Path2Package for openening an existing database.""" + with tempfile.NamedTemporaryFile(suffix=".sqlite3") as db_file: + path2package = _Path2Package(pathlib.Path(db_file.name)) + self.assertEqual(dict(path2package), {}) + self.assertTrue(path2package.is_empty()) + path2package["bin/ip"] = "iproute2" + path2package.connection.commit() + del path2package + + path2package = _Path2Package(pathlib.Path(db_file.name)) + self.assertEqual(dict(path2package), {"bin/ip": "iproute2"}) + self.assertFalse(path2package.is_empty()) + + @unittest.mock.patch("sqlite3.connect") + def test_path2package_clean_deletion_on_failure( + self, connect_mock: MagicMock + ) -> None: + """Test clean _Path2Package deletion on __init__ failure.""" + connect_mock.side_effect = sqlite3.OperationalError + with self.assertRaises(sqlite3.OperationalError): + _Path2Package(pathlib.Path("/non-existing.sqlite3")) class TestPackaging(unittest.TestCase):