libalpm: don't download files from local servers

Message ID 20220109170417.405098-1-morganamilo@archlinux.org
State Under Review
Headers show
Series libalpm: don't download files from local servers | expand

Commit Message

morganamilo Jan. 9, 2022, 5:04 p.m. UTC
This causes file:// servers to be treated as if they were cache dirs
when checking if a package needs to be downloaded/read.

This stops packages being duplicated any time pacman "downloads"
a package from a local repository.
---
 lib/libalpm/package.c |  4 ++--
 lib/libalpm/sync.c    | 20 ++++++--------------
 lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++--
 lib/libalpm/util.h    |  3 ++-
 4 files changed, 43 insertions(+), 19 deletions(-)

Comments

Xiretza Jan. 9, 2022, 5:39 p.m. UTC | #1
On 09/01/2022 18.04, morganamilo wrote:
> This causes file:// servers to be treated as if they were cache dirs
> when checking if a package needs to be downloaded/read.
> 
> This stops packages being duplicated any time pacman "downloads"
> a package from a local repository.

Hm, I've always liked that behaviour, since it allows me to keep my local mirror thin (at 1-2 package revisions), while my cachedir contains older versions of only the packages I actually have/had installed. I realize this might be a bit "This breaks my workflow!", but I thought I'd at least bring it up :)

-xiretza
Lone_Wolf Jan. 9, 2022, 11:34 p.m. UTC | #2
On 09-01-2022 18:39, Xiretza wrote:
>
> On 09/01/2022 18.04, morganamilo wrote:
>> This causes file:// servers to be treated as if they were cache dirs
>> when checking if a package needs to be downloaded/read.
>>
>> This stops packages being duplicated any time pacman "downloads"
>> a package from a local repository.
>
> Hm, I've always liked that behaviour, since it allows me to keep my 
> local mirror thin (at 1-2 package revisions), while my cachedir 
> contains older versions of only the packages I actually have/had 
> installed. I realize this might be a bit "This breaks my workflow!", 
> but I thought I'd at least bring it up :)
>
> -xiretza


Similar for me, my local repo only holds the last version of packages 
and pacman cache is an excellent archive for older ones.

Guess I have to switch to a ftp server running on localhost (using bind 
mounts to link to the 'real' local repo ?) once this gets implemented.


Lone_Wolf
Allan McRae Jan. 9, 2022, 11:49 p.m. UTC | #3
On 10/1/22 03:04, morganamilo wrote:
> This causes file:// servers to be treated as if they were cache dirs
> when checking if a package needs to be downloaded/read.
> 
> This stops packages being duplicated any time pacman "downloads"
> a package from a local repository.

Does adding the local repo as a secondary value to CacheDir not work?

A
Ruben Kelevra Jan. 10, 2022, 1:52 a.m. UTC | #4
Hey Morganamilo,

while the intention might be good, I think it's better to duplicate the
files:

- If it's a slow filesystem or remote, this will slow things down, as we
will fetch the packages twice from it (once for checksumming once for
extraction)
- If the filesystem is remote, it might become unavailable while pacman is
running.
- There's no progress on reading files. So for a user it might look like
pacman got stuck if pacman reads a remote filesystem over a low connection.


Best regards

Ruben

On Sun, Jan 9, 2022, 18:04 morganamilo <morganamilo@archlinux.org> wrote:

> This causes file:// servers to be treated as if they were cache dirs
> when checking if a package needs to be downloaded/read.
>
> This stops packages being duplicated any time pacman "downloads"
> a package from a local repository.
> ---
>  lib/libalpm/package.c |  4 ++--
>  lib/libalpm/sync.c    | 20 ++++++--------------
>  lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++--
>  lib/libalpm/util.h    |  3 ++-
>  4 files changed, 43 insertions(+), 19 deletions(-)
>
> diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
> index f837f84a..f805edfa 100644
> --- a/lib/libalpm/package.c
> +++ b/lib/libalpm/package.c
> @@ -57,7 +57,7 @@ int SYMEXPORT alpm_pkg_checkmd5sum(alpm_pkg_t *pkg)
>         ASSERT(pkg->origin == ALPM_PKG_FROM_SYNCDB,
>                         RET_ERR(pkg->handle, ALPM_ERR_WRONG_ARGS, -1));
>
> -       fpath = _alpm_filecache_find(pkg->handle, pkg->filename);
> +       fpath = _alpm_cache_find_pkg(pkg, 0);
>
>         retval = _alpm_test_checksum(fpath, pkg->md5sum,
> ALPM_PKG_VALIDATION_MD5SUM);
>
> @@ -283,7 +283,7 @@ int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg,
> unsigned char **sig, size_t *sig
>                 alpm_errno_t err;
>                 int ret = -1;
>
> -               pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
> +               pkgpath = _alpm_cache_find_pkg(pkg, 0);
>                 if(!pkgpath) {
>                         GOTO_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND,
> cleanup);
>                 }
> diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> index 36ad6242..f89d5c0d 100644
> --- a/lib/libalpm/sync.c
> +++ b/lib/libalpm/sync.c
> @@ -323,7 +323,7 @@ static int compute_download_size(alpm_pkg_t *newpkg)
>
>         ASSERT(newpkg->filename != NULL, RET_ERR(handle,
> ALPM_ERR_PKG_INVALID_NAME, -1));
>         fname = newpkg->filename;
> -       fpath = _alpm_filecache_find(handle, fname);
> +       fpath = _alpm_cache_find_pkg(newpkg, 0);
>
>         /* downloaded file exists, so there's nothing to grab */
>         if(fpath) {
> @@ -333,7 +333,7 @@ static int compute_download_size(alpm_pkg_t *newpkg)
>
>         CALLOC(fnamepart, strlen(fname) + 6, sizeof(char), return -1);
>         sprintf(fnamepart, "%s.part", fname);
> -       fpath = _alpm_filecache_find(handle, fnamepart);
> +       fpath = _alpm_cache_find_pkg(newpkg, 1);
>         if(fpath) {
>                 struct stat st;
>                 if(stat(fpath, &st) == 0) {
> @@ -737,21 +737,13 @@ static int find_dl_candidates(alpm_handle_t *handle,
> alpm_list_t **files)
>
>                         ASSERT(spkg->filename != NULL, RET_ERR(handle,
> ALPM_ERR_PKG_INVALID_NAME, -1));
>
> -                       need_download = spkg->download_size != 0 ||
> !_alpm_filecache_exists(handle, spkg->filename);
> +                       need_download = spkg->download_size != 0 ||
> !_alpm_cache_pkg_exists(spkg, 0);
>                         /* even if the package file in the cache we need
> to check for
>                          * accompanion *.sig file as well.
>                          * If *.sig is not cached then force download the
> package + its signature file.
>                          */
>                         if(!need_download && (siglevel &
> ALPM_SIG_PACKAGE)) {
> -                               char *sig_filename = NULL;
> -                               int len = strlen(spkg->filename) + 5;
> -
> -                               MALLOC(sig_filename, len, RET_ERR(handle,
> ALPM_ERR_MEMORY, -1));
> -                               snprintf(sig_filename, len, "%s.sig",
> spkg->filename);
> -
> -                               need_download =
> !_alpm_filecache_exists(handle, sig_filename);
> -
> -                               FREE(sig_filename);
> +                               need_download =
> !_alpm_cache_pkg_exists(spkg, 1);
>                         }
>
>                         if(need_download) {
> @@ -990,7 +982,7 @@ static int check_validity(alpm_handle_t *handle,
>                 }
>
>                 current_bytes += v.pkg->size;
> -               v.path = _alpm_filecache_find(handle, v.pkg->filename);
> +               v.path = _alpm_cache_find_pkg(v.pkg, 0);
>                 v.siglevel = alpm_db_get_siglevel(alpm_pkg_get_db(v.pkg));
>
>                 if(_alpm_pkg_validate_internal(handle, v.path, v.pkg,
> @@ -1080,7 +1072,7 @@ static int load_packages(alpm_handle_t *handle,
> alpm_list_t **data,
>                 }
>
>                 current_bytes += spkg->size;
> -               filepath = _alpm_filecache_find(handle, spkg->filename);
> +               filepath = _alpm_cache_find_pkg(spkg, 0);
>
>                 /* load the package file and replace pkgcache entry with
> it in the target list */
>                 /* TODO: alpm_pkg_get_db() will not work on this target
> anymore */
> diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
> index 299d287e..f871dce0 100644
> --- a/lib/libalpm/util.c
> +++ b/lib/libalpm/util.c
> @@ -815,6 +815,37 @@ int _alpm_str_cmp(const void *s1, const void *s2)
>         return strcmp(s1, s2);
>  }
>
> +char *_alpm_cache_find_pkg(alpm_pkg_t *pkg, int sig) {
> +       alpm_handle_t *handle = pkg->handle;
> +       struct stat buf;
> +       alpm_list_t *servers = pkg->origin_data.db->servers;
> +       char *retpath;
> +       char filepath[PATH_MAX];
> +
> +       for(alpm_list_t *j = servers; j; j = j->next) {
> +               char *server = j->data;
> +
> +               if(strncmp("file://", server, strlen("file://")) == 0) {
> +                       int len = strlen(server) - strlen("file://") + 1 +
> strlen(pkg->filename) + 1;
> +
> +                       if(sig) {
> +                               len += strlen(".sig");
> +                               snprintf(filepath, len, "%s/%s", server +
> strlen("file://"), pkg->filename);
> +                       } else {
> +                               snprintf(filepath, len, "%s/%s.sig",
> server + strlen("file://"), pkg->filename);
> +                       }
> +
> +                       if(stat(filepath, &buf) == 0 &&
> S_ISREG(buf.st_mode)) {
> +                               STRDUP(retpath, filepath, RET_ERR(handle,
> ALPM_ERR_MEMORY, NULL));
> +                               _alpm_log(handle, ALPM_LOG_DEBUG, "found
> pkg in repo cache: %s\n", retpath);
> +                               return retpath;
> +                       }
> +               }
> +       }
> +
> +       return _alpm_filecache_find(handle, pkg->filename);
> +}
> +
>  /** Find a filename in a registered alpm cachedir.
>   * @param handle the context handle
>   * @param filename name of file to find
> @@ -846,10 +877,10 @@ char *_alpm_filecache_find(alpm_handle_t *handle,
> const char *filename)
>   * @param filename name of file to find
>   * @return 0 if the filename was not found, 1 otherwise
>   */
> -int _alpm_filecache_exists(alpm_handle_t *handle, const char *filename)
> +int _alpm_cache_pkg_exists(alpm_pkg_t *pkg, int sig)
>  {
>         int res;
> -       char *fpath = _alpm_filecache_find(handle, filename);
> +       char *fpath = _alpm_cache_find_pkg(pkg, sig);
>         res = (fpath != NULL);
>         FREE(fpath);
>         return res;
> diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
> index b7297f81..5c1febef 100644
> --- a/lib/libalpm/util.h
> +++ b/lib/libalpm/util.h
> @@ -133,9 +133,10 @@ int _alpm_run_chroot(alpm_handle_t *handle, const
> char *cmd, char *const argv[],
>                 _alpm_cb_io in_cb, void *in_ctx);
>  int _alpm_ldconfig(alpm_handle_t *handle);
>  int _alpm_str_cmp(const void *s1, const void *s2);
> +char *_alpm_cache_find_pkg(alpm_pkg_t *pkg, int sig);
>  char *_alpm_filecache_find(alpm_handle_t *handle, const char *filename);
>  /* Checks whether a file exists in cache */
> -int _alpm_filecache_exists(alpm_handle_t *handle, const char *filename);
> +int _alpm_cache_pkg_exists(alpm_pkg_t *pkg, int sig);
>  const char *_alpm_filecache_setup(alpm_handle_t *handle);
>  /* Unlike many uses of alpm_pkgvalidation_t, _alpm_test_checksum expects
>   * an enum value rather than a bitfield. */
> --
> 2.34.1
>
>

Patch

diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
index f837f84a..f805edfa 100644
--- a/lib/libalpm/package.c
+++ b/lib/libalpm/package.c
@@ -57,7 +57,7 @@  int SYMEXPORT alpm_pkg_checkmd5sum(alpm_pkg_t *pkg)
 	ASSERT(pkg->origin == ALPM_PKG_FROM_SYNCDB,
 			RET_ERR(pkg->handle, ALPM_ERR_WRONG_ARGS, -1));
 
-	fpath = _alpm_filecache_find(pkg->handle, pkg->filename);
+	fpath = _alpm_cache_find_pkg(pkg, 0);
 
 	retval = _alpm_test_checksum(fpath, pkg->md5sum, ALPM_PKG_VALIDATION_MD5SUM);
 
@@ -283,7 +283,7 @@  int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig, size_t *sig
 		alpm_errno_t err;
 		int ret = -1;
 
-		pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
+		pkgpath = _alpm_cache_find_pkg(pkg, 0);
 		if(!pkgpath) {
 			GOTO_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND, cleanup);
 		}
diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
index 36ad6242..f89d5c0d 100644
--- a/lib/libalpm/sync.c
+++ b/lib/libalpm/sync.c
@@ -323,7 +323,7 @@  static int compute_download_size(alpm_pkg_t *newpkg)
 
 	ASSERT(newpkg->filename != NULL, RET_ERR(handle, ALPM_ERR_PKG_INVALID_NAME, -1));
 	fname = newpkg->filename;
-	fpath = _alpm_filecache_find(handle, fname);
+	fpath = _alpm_cache_find_pkg(newpkg, 0);
 
 	/* downloaded file exists, so there's nothing to grab */
 	if(fpath) {
@@ -333,7 +333,7 @@  static int compute_download_size(alpm_pkg_t *newpkg)
 
 	CALLOC(fnamepart, strlen(fname) + 6, sizeof(char), return -1);
 	sprintf(fnamepart, "%s.part", fname);
-	fpath = _alpm_filecache_find(handle, fnamepart);
+	fpath = _alpm_cache_find_pkg(newpkg, 1);
 	if(fpath) {
 		struct stat st;
 		if(stat(fpath, &st) == 0) {
@@ -737,21 +737,13 @@  static int find_dl_candidates(alpm_handle_t *handle, alpm_list_t **files)
 
 			ASSERT(spkg->filename != NULL, RET_ERR(handle, ALPM_ERR_PKG_INVALID_NAME, -1));
 
-			need_download = spkg->download_size != 0 || !_alpm_filecache_exists(handle, spkg->filename);
+			need_download = spkg->download_size != 0 || !_alpm_cache_pkg_exists(spkg, 0);
 			/* even if the package file in the cache we need to check for
 			 * accompanion *.sig file as well.
 			 * If *.sig is not cached then force download the package + its signature file.
 			 */
 			if(!need_download && (siglevel & ALPM_SIG_PACKAGE)) {
-				char *sig_filename = NULL;
-				int len = strlen(spkg->filename) + 5;
-
-				MALLOC(sig_filename, len, RET_ERR(handle, ALPM_ERR_MEMORY, -1));
-				snprintf(sig_filename, len, "%s.sig", spkg->filename);
-
-				need_download = !_alpm_filecache_exists(handle, sig_filename);
-
-				FREE(sig_filename);
+				need_download = !_alpm_cache_pkg_exists(spkg, 1);
 			}
 
 			if(need_download) {
@@ -990,7 +982,7 @@  static int check_validity(alpm_handle_t *handle,
 		}
 
 		current_bytes += v.pkg->size;
-		v.path = _alpm_filecache_find(handle, v.pkg->filename);
+		v.path = _alpm_cache_find_pkg(v.pkg, 0);
 		v.siglevel = alpm_db_get_siglevel(alpm_pkg_get_db(v.pkg));
 
 		if(_alpm_pkg_validate_internal(handle, v.path, v.pkg,
@@ -1080,7 +1072,7 @@  static int load_packages(alpm_handle_t *handle, alpm_list_t **data,
 		}
 
 		current_bytes += spkg->size;
-		filepath = _alpm_filecache_find(handle, spkg->filename);
+		filepath = _alpm_cache_find_pkg(spkg, 0);
 
 		/* load the package file and replace pkgcache entry with it in the target list */
 		/* TODO: alpm_pkg_get_db() will not work on this target anymore */
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
index 299d287e..f871dce0 100644
--- a/lib/libalpm/util.c
+++ b/lib/libalpm/util.c
@@ -815,6 +815,37 @@  int _alpm_str_cmp(const void *s1, const void *s2)
 	return strcmp(s1, s2);
 }
 
+char *_alpm_cache_find_pkg(alpm_pkg_t *pkg, int sig) {
+	alpm_handle_t *handle = pkg->handle;
+	struct stat buf;
+	alpm_list_t *servers = pkg->origin_data.db->servers;
+	char *retpath;
+	char filepath[PATH_MAX];
+
+	for(alpm_list_t *j = servers; j; j = j->next) {
+		char *server = j->data;
+
+		if(strncmp("file://", server, strlen("file://")) == 0) {
+			int len = strlen(server) - strlen("file://") + 1 + strlen(pkg->filename) + 1;
+
+			if(sig) {
+				len += strlen(".sig");
+				snprintf(filepath, len, "%s/%s", server + strlen("file://"), pkg->filename);
+			} else {
+				snprintf(filepath, len, "%s/%s.sig", server + strlen("file://"), pkg->filename);
+			}
+
+			if(stat(filepath, &buf) == 0 && S_ISREG(buf.st_mode)) {
+				STRDUP(retpath, filepath, RET_ERR(handle, ALPM_ERR_MEMORY, NULL));
+				_alpm_log(handle, ALPM_LOG_DEBUG, "found pkg in repo cache: %s\n", retpath);
+				return retpath;
+			}
+		}
+	}
+
+	return _alpm_filecache_find(handle, pkg->filename);
+}
+
 /** Find a filename in a registered alpm cachedir.
  * @param handle the context handle
  * @param filename name of file to find
@@ -846,10 +877,10 @@  char *_alpm_filecache_find(alpm_handle_t *handle, const char *filename)
  * @param filename name of file to find
  * @return 0 if the filename was not found, 1 otherwise
  */
-int _alpm_filecache_exists(alpm_handle_t *handle, const char *filename)
+int _alpm_cache_pkg_exists(alpm_pkg_t *pkg, int sig)
 {
 	int res;
-	char *fpath = _alpm_filecache_find(handle, filename);
+	char *fpath = _alpm_cache_find_pkg(pkg, sig);
 	res = (fpath != NULL);
 	FREE(fpath);
 	return res;
diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
index b7297f81..5c1febef 100644
--- a/lib/libalpm/util.h
+++ b/lib/libalpm/util.h
@@ -133,9 +133,10 @@  int _alpm_run_chroot(alpm_handle_t *handle, const char *cmd, char *const argv[],
 		_alpm_cb_io in_cb, void *in_ctx);
 int _alpm_ldconfig(alpm_handle_t *handle);
 int _alpm_str_cmp(const void *s1, const void *s2);
+char *_alpm_cache_find_pkg(alpm_pkg_t *pkg, int sig);
 char *_alpm_filecache_find(alpm_handle_t *handle, const char *filename);
 /* Checks whether a file exists in cache */
-int _alpm_filecache_exists(alpm_handle_t *handle, const char *filename);
+int _alpm_cache_pkg_exists(alpm_pkg_t *pkg, int sig);
 const char *_alpm_filecache_setup(alpm_handle_t *handle);
 /* Unlike many uses of alpm_pkgvalidation_t, _alpm_test_checksum expects
  * an enum value rather than a bitfield. */