[pacman-dev] Support parallel download with xfercommand

Message ID 20201019231956.206070-1-lestofante88@gmail.com
State Under Review
Headers show
Series [pacman-dev] Support parallel download with xfercommand | expand

Commit Message

lesto fante Oct. 19, 2020, 11:19 p.m. UTC
Signed-off-by: lesto <lestofante88@gmail.com>
---
 lib/libalpm/alpm.h       | 10 ++++++++++
 lib/libalpm/dload.c      |  9 +++++++++
 lib/libalpm/handle.c     | 13 +++++++++++++
 lib/libalpm/handle.h     |  1 +
 src/pacman/conf.c        |  5 ++++-
 src/pacman/conf.h        |  1 +
 src/pacman/pacman-conf.c |  3 +++
 7 files changed, 41 insertions(+), 1 deletion(-)

Comments

Eli Schwartz Oct. 20, 2020, 2:01 a.m. UTC | #1
On 10/19/20 7:19 PM, lesto wrote:
> Signed-off-by: lesto <lestofante88@gmail.com>
> ---
>  lib/libalpm/alpm.h       | 10 ++++++++++
>  lib/libalpm/dload.c      |  9 +++++++++
>  lib/libalpm/handle.c     | 13 +++++++++++++
>  lib/libalpm/handle.h     |  1 +
>  src/pacman/conf.c        |  5 ++++-
>  src/pacman/conf.h        |  1 +
>  src/pacman/pacman-conf.c |  3 +++
>  7 files changed, 41 insertions(+), 1 deletion(-)


Your patch never even reads the XferLockCommand key to store its value
in "config". It's not clear what this is intended to do or how it
functions. Trivial check: try adding "XferLockCommand" to /etc/pacman.conf

warning: config file /etc/pacman.conf, line 22: directive
'XferLockCommand' in section 'options' not recognized.

Please fix this, and include an update to doc/pacman.conf.5.asciidoc
describing the new key and how to use it.


> 
> diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> index 614a530c..c2af60e8 100644
> --- a/lib/libalpm/alpm.h
> +++ b/lib/libalpm/alpm.h
> @@ -755,6 +755,11 @@ typedef void (*alpm_cb_totaldl)(off_t total);
>  typedef int (*alpm_cb_fetch)(const char *url, const char *localpath,
>  		int force);
>  
> +/** A callback for waiting for download of files
> + * @return 0 on success, -1 on error.
> + */
> +typedef int (*alpm_cb_fetch_lock)(void);
> +
>  /** Fetch a list of remote packages.
>   * @param handle the context handle
>   * @param urls list of package URLs to download
> @@ -787,6 +792,11 @@ alpm_cb_fetch alpm_option_get_fetchcb(alpm_handle_t *handle);
>  /** Sets the downloading callback. */
>  int alpm_option_set_fetchcb(alpm_handle_t *handle, alpm_cb_fetch cb);
>  
> +/** Returns the downloading lock callback. */
> +alpm_cb_fetch_lock alpm_option_get_fetch_lockcb(alpm_handle_t *handle);
> +/** Sets the downloading lock callback. */
> +int alpm_option_set_fetch_lockcb(alpm_handle_t *handle, alpm_cb_fetch_lock cb);
> +
>  /** Returns the callback used to report total download size. */
>  alpm_cb_totaldl alpm_option_get_totaldlcb(alpm_handle_t *handle);
>  /** Sets the callback used to report total download size. */
> diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> index 673e769f..174d559d 100644
> --- a/lib/libalpm/dload.c
> +++ b/lib/libalpm/dload.c
> @@ -824,6 +824,15 @@ int _alpm_download(alpm_handle_t *handle,
>  				RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
>  			}
>  		}
> +
> +		if (handle->fetch_lockcb != NULL) {
> +			// if fetch_lockcb is set, fetchcb is non-blocking; here we wait for all download to complete
> +			int ret = handle->fetch_lockcb();
> +			if (ret == -1) {
> +				RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
> +			}
> +		}
> +
>  		return 0;
>  	}
>  }
> diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c
> index 1310601a..683e678d 100644
> --- a/lib/libalpm/handle.c
> +++ b/lib/libalpm/handle.c
> @@ -174,6 +174,12 @@ alpm_cb_fetch SYMEXPORT alpm_option_get_fetchcb(alpm_handle_t *handle)
>  	return handle->fetchcb;
>  }
>  
> +alpm_cb_fetch_lock SYMEXPORT alpm_option_get_fetch_lockcb(alpm_handle_t *handle)
> +{
> +	CHECK_HANDLE(handle, return NULL);
> +	return handle->fetch_lockcb;
> +}
> +
>  alpm_cb_totaldl SYMEXPORT alpm_option_get_totaldlcb(alpm_handle_t *handle)
>  {
>  	CHECK_HANDLE(handle, return NULL);
> @@ -321,6 +327,13 @@ int SYMEXPORT alpm_option_set_fetchcb(alpm_handle_t *handle, alpm_cb_fetch cb)
>  	return 0;
>  }
>  
> +int SYMEXPORT alpm_option_set_fetch_lockcb(alpm_handle_t *handle, alpm_cb_fetch_lock cb)
> +{
> +	CHECK_HANDLE(handle, return -1);
> +	handle->fetch_lockcb = cb;
> +	return 0;
> +}
> +
>  int SYMEXPORT alpm_option_set_totaldlcb(alpm_handle_t *handle, alpm_cb_totaldl cb)
>  {
>  	CHECK_HANDLE(handle, return -1);
> diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h
> index 9fef0fbf..dc00751b 100644
> --- a/lib/libalpm/handle.h
> +++ b/lib/libalpm/handle.h
> @@ -73,6 +73,7 @@ struct __alpm_handle_t {
>  	alpm_cb_download dlcb;      /* Download callback function */
>  	alpm_cb_totaldl totaldlcb;  /* Total download callback function */
>  	alpm_cb_fetch fetchcb;      /* Download file callback function */
> +	alpm_cb_fetch_lock fetch_lockcb;        /* Download lock file callback function */
>  	alpm_cb_event eventcb;
>  	alpm_cb_question questioncb;
>  	alpm_cb_progress progresscb;
> diff --git a/src/pacman/conf.c b/src/pacman/conf.c
> index 3a3ef605..53de73b8 100644
> --- a/src/pacman/conf.c
> +++ b/src/pacman/conf.c
> @@ -157,6 +157,7 @@ int config_free(config_t *oldconfig)
>  	FREELIST(oldconfig->hookdirs);
>  	FREELIST(oldconfig->cachedirs);
>  	free(oldconfig->xfercommand);
> +	free(oldconfig->xferlockcommand);
>  	free(oldconfig->print_format);
>  	free(oldconfig->arch);
>  	wordsplit_free(oldconfig->xfercommand_argv);
> @@ -319,7 +320,9 @@ static int download_with_xfercommand(const char *url, const char *localpath,
>  	for(i = 0; i <= config->xfercommand_argc; i++) {
>  		const char *val = config->xfercommand_argv[i];
>  		if(val && strcmp(val, "%o") == 0) {
> -			usepart = 1;
> +			if (config->xferlockcommand == NULL) {
> +				usepart = 1;
> +			}
>  			val = tempfile;
>  		} else if(val && strcmp(val, "%u") == 0) {
>  			val = url;
> diff --git a/src/pacman/conf.h b/src/pacman/conf.h
> index b8a451ad..1a9d637d 100644
> --- a/src/pacman/conf.h
> +++ b/src/pacman/conf.h
> @@ -130,6 +130,7 @@ typedef struct __config_t {
>  	char *xfercommand;
>  	char **xfercommand_argv;
>  	size_t xfercommand_argc;
> +	char *xferlockcommand;
>  
>  	/* our connection to libalpm */
>  	alpm_handle_t *handle;
> diff --git a/src/pacman/pacman-conf.c b/src/pacman/pacman-conf.c
> index 463badf1..7c4f4cc9 100644
> --- a/src/pacman/pacman-conf.c
> +++ b/src/pacman/pacman-conf.c
> @@ -259,6 +259,7 @@ static void dump_config(void)
>  
>  	show_str("Architecture", config->arch);
>  	show_str("XferCommand", config->xfercommand);
> +	show_str("XferLockCommand", config->xferlockcommand);
>  
>  	show_bool("UseSyslog", config->usesyslog);
>  	show_bool("Color", config->color);
> @@ -366,6 +367,8 @@ static int list_directives(void)
>  			show_str("Architecture", config->arch);
>  		} else if(strcasecmp(i->data, "XferCommand") == 0) {
>  			show_str("XferCommand", config->xfercommand);
> +		} else if(strcasecmp(i->data, "XferLockCommand") == 0) {
> +			show_str("XferLockCommand", config->xferlockcommand);
>  
>  		} else if(strcasecmp(i->data, "UseSyslog") == 0) {
>  			show_bool("UseSyslog", config->usesyslog);
>
lesto fante Oct. 20, 2020, 2:21 p.m. UTC | #2
The idea here is that if XferLockCommand is set, XferCommand can be
non-blocking; all invocations of XferCommand are executed and only
then XferLockCommand is executed to wait for all the downloads to
complete.

I am also thinking of changing XferLockCommand to XferWaitCommand, if
you have better names please let me know.

I will fix and resubmit.
BTW i noticed that master fails some check, is it safe to be used for
testing (in a virtual machine) or should i use a specific commit?

Il giorno mar 20 ott 2020 alle ore 04:01 Eli Schwartz
<eschwartz@archlinux.org> ha scritto:
>
> On 10/19/20 7:19 PM, lesto wrote:
> > Signed-off-by: lesto <lestofante88@gmail.com>
> > ---
> >  lib/libalpm/alpm.h       | 10 ++++++++++
> >  lib/libalpm/dload.c      |  9 +++++++++
> >  lib/libalpm/handle.c     | 13 +++++++++++++
> >  lib/libalpm/handle.h     |  1 +
> >  src/pacman/conf.c        |  5 ++++-
> >  src/pacman/conf.h        |  1 +
> >  src/pacman/pacman-conf.c |  3 +++
> >  7 files changed, 41 insertions(+), 1 deletion(-)
>
>
> Your patch never even reads the XferLockCommand key to store its value
> in "config". It's not clear what this is intended to do or how it
> functions. Trivial check: try adding "XferLockCommand" to /etc/pacman.conf
>
> warning: config file /etc/pacman.conf, line 22: directive
> 'XferLockCommand' in section 'options' not recognized.
>
> Please fix this, and include an update to doc/pacman.conf.5.asciidoc
> describing the new key and how to use it.
>
>
> >
> > diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> > index 614a530c..c2af60e8 100644
> > --- a/lib/libalpm/alpm.h
> > +++ b/lib/libalpm/alpm.h
> > @@ -755,6 +755,11 @@ typedef void (*alpm_cb_totaldl)(off_t total);
> >  typedef int (*alpm_cb_fetch)(const char *url, const char *localpath,
> >               int force);
> >
> > +/** A callback for waiting for download of files
> > + * @return 0 on success, -1 on error.
> > + */
> > +typedef int (*alpm_cb_fetch_lock)(void);
> > +
> >  /** Fetch a list of remote packages.
> >   * @param handle the context handle
> >   * @param urls list of package URLs to download
> > @@ -787,6 +792,11 @@ alpm_cb_fetch alpm_option_get_fetchcb(alpm_handle_t *handle);
> >  /** Sets the downloading callback. */
> >  int alpm_option_set_fetchcb(alpm_handle_t *handle, alpm_cb_fetch cb);
> >
> > +/** Returns the downloading lock callback. */
> > +alpm_cb_fetch_lock alpm_option_get_fetch_lockcb(alpm_handle_t *handle);
> > +/** Sets the downloading lock callback. */
> > +int alpm_option_set_fetch_lockcb(alpm_handle_t *handle, alpm_cb_fetch_lock cb);
> > +
> >  /** Returns the callback used to report total download size. */
> >  alpm_cb_totaldl alpm_option_get_totaldlcb(alpm_handle_t *handle);
> >  /** Sets the callback used to report total download size. */
> > diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
> > index 673e769f..174d559d 100644
> > --- a/lib/libalpm/dload.c
> > +++ b/lib/libalpm/dload.c
> > @@ -824,6 +824,15 @@ int _alpm_download(alpm_handle_t *handle,
> >                               RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
> >                       }
> >               }
> > +
> > +             if (handle->fetch_lockcb != NULL) {
> > +                     // if fetch_lockcb is set, fetchcb is non-blocking; here we wait for all download to complete
> > +                     int ret = handle->fetch_lockcb();
> > +                     if (ret == -1) {
> > +                             RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
> > +                     }
> > +             }
> > +
> >               return 0;
> >       }
> >  }
> > diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c
> > index 1310601a..683e678d 100644
> > --- a/lib/libalpm/handle.c
> > +++ b/lib/libalpm/handle.c
> > @@ -174,6 +174,12 @@ alpm_cb_fetch SYMEXPORT alpm_option_get_fetchcb(alpm_handle_t *handle)
> >       return handle->fetchcb;
> >  }
> >
> > +alpm_cb_fetch_lock SYMEXPORT alpm_option_get_fetch_lockcb(alpm_handle_t *handle)
> > +{
> > +     CHECK_HANDLE(handle, return NULL);
> > +     return handle->fetch_lockcb;
> > +}
> > +
> >  alpm_cb_totaldl SYMEXPORT alpm_option_get_totaldlcb(alpm_handle_t *handle)
> >  {
> >       CHECK_HANDLE(handle, return NULL);
> > @@ -321,6 +327,13 @@ int SYMEXPORT alpm_option_set_fetchcb(alpm_handle_t *handle, alpm_cb_fetch cb)
> >       return 0;
> >  }
> >
> > +int SYMEXPORT alpm_option_set_fetch_lockcb(alpm_handle_t *handle, alpm_cb_fetch_lock cb)
> > +{
> > +     CHECK_HANDLE(handle, return -1);
> > +     handle->fetch_lockcb = cb;
> > +     return 0;
> > +}
> > +
> >  int SYMEXPORT alpm_option_set_totaldlcb(alpm_handle_t *handle, alpm_cb_totaldl cb)
> >  {
> >       CHECK_HANDLE(handle, return -1);
> > diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h
> > index 9fef0fbf..dc00751b 100644
> > --- a/lib/libalpm/handle.h
> > +++ b/lib/libalpm/handle.h
> > @@ -73,6 +73,7 @@ struct __alpm_handle_t {
> >       alpm_cb_download dlcb;      /* Download callback function */
> >       alpm_cb_totaldl totaldlcb;  /* Total download callback function */
> >       alpm_cb_fetch fetchcb;      /* Download file callback function */
> > +     alpm_cb_fetch_lock fetch_lockcb;        /* Download lock file callback function */
> >       alpm_cb_event eventcb;
> >       alpm_cb_question questioncb;
> >       alpm_cb_progress progresscb;
> > diff --git a/src/pacman/conf.c b/src/pacman/conf.c
> > index 3a3ef605..53de73b8 100644
> > --- a/src/pacman/conf.c
> > +++ b/src/pacman/conf.c
> > @@ -157,6 +157,7 @@ int config_free(config_t *oldconfig)
> >       FREELIST(oldconfig->hookdirs);
> >       FREELIST(oldconfig->cachedirs);
> >       free(oldconfig->xfercommand);
> > +     free(oldconfig->xferlockcommand);
> >       free(oldconfig->print_format);
> >       free(oldconfig->arch);
> >       wordsplit_free(oldconfig->xfercommand_argv);
> > @@ -319,7 +320,9 @@ static int download_with_xfercommand(const char *url, const char *localpath,
> >       for(i = 0; i <= config->xfercommand_argc; i++) {
> >               const char *val = config->xfercommand_argv[i];
> >               if(val && strcmp(val, "%o") == 0) {
> > -                     usepart = 1;
> > +                     if (config->xferlockcommand == NULL) {
> > +                             usepart = 1;
> > +                     }
> >                       val = tempfile;
> >               } else if(val && strcmp(val, "%u") == 0) {
> >                       val = url;
> > diff --git a/src/pacman/conf.h b/src/pacman/conf.h
> > index b8a451ad..1a9d637d 100644
> > --- a/src/pacman/conf.h
> > +++ b/src/pacman/conf.h
> > @@ -130,6 +130,7 @@ typedef struct __config_t {
> >       char *xfercommand;
> >       char **xfercommand_argv;
> >       size_t xfercommand_argc;
> > +     char *xferlockcommand;
> >
> >       /* our connection to libalpm */
> >       alpm_handle_t *handle;
> > diff --git a/src/pacman/pacman-conf.c b/src/pacman/pacman-conf.c
> > index 463badf1..7c4f4cc9 100644
> > --- a/src/pacman/pacman-conf.c
> > +++ b/src/pacman/pacman-conf.c
> > @@ -259,6 +259,7 @@ static void dump_config(void)
> >
> >       show_str("Architecture", config->arch);
> >       show_str("XferCommand", config->xfercommand);
> > +     show_str("XferLockCommand", config->xferlockcommand);
> >
> >       show_bool("UseSyslog", config->usesyslog);
> >       show_bool("Color", config->color);
> > @@ -366,6 +367,8 @@ static int list_directives(void)
> >                       show_str("Architecture", config->arch);
> >               } else if(strcasecmp(i->data, "XferCommand") == 0) {
> >                       show_str("XferCommand", config->xfercommand);
> > +             } else if(strcasecmp(i->data, "XferLockCommand") == 0) {
> > +                     show_str("XferLockCommand", config->xferlockcommand);
> >
> >               } else if(strcasecmp(i->data, "UseSyslog") == 0) {
> >                       show_bool("UseSyslog", config->usesyslog);
> >
>
>
> --
> Eli Schwartz
> Bug Wrangler and Trusted User
>
Allan McRae Oct. 21, 2020, 12:24 a.m. UTC | #3
On 21/10/20 12:21 am, lesto fante wrote:
> The idea here is that if XferLockCommand is set, XferCommand can be
> non-blocking; all invocations of XferCommand are executed and only
> then XferLockCommand is executed to wait for all the downloads to
> complete.
> 

So if you have an update with 150 package, every single one starts
downloading at the same time?

Any implementation of this needs to respect the ParallelDownloads
configuration option.

> I am also thinking of changing XferLockCommand to XferWaitCommand, if
> you have better names please let me know.

Why even need this?  A user either has ParallelDownloads set to be
greater than 1, or does not.

> I will fix and resubmit.
> BTW i noticed that master fails some check, is it safe to be used for
> testing (in a virtual machine) or should i use a specific commit?
> 

There are no test failures.  My guess is you need to install fakechroot.

Also, please don't top post.

Allan
lesto fante Oct. 21, 2020, 1:54 a.m. UTC | #4
Hi,
The general idea is to make it possible to have multiple XferCommand
running in parallel.
Rather than trying to keep track of multiple XferCommand, I thought it
would be much easier to let XferCommand to fork/send request to a
daemon and die; then let pacman call a final script `XferLockCommand`
that will block until all download are completed, it will return the
classic 0 on success and -1 on error.

After the introduction of the parallel download, it has been given an
informal greenlight to submit a patch for XferCommand, so here I am.

As I choose simplicity, there is currently no way for pacman to know
how many downloads are happening in the background, their status,
which one did fail, just the final result success/error.

I see 2 major slowdown to my downloads;
- small file overhead
- mirror bandwidth

Currently I have a script that will pick all uncommented servers in
pacman list, divide them in groups of 3, and for each group download
one package. This make sure there is only one connection for server (i
assume server will not artificially limit the bandwidth, and if they
do i don't want to bypass their limit) while having multiple file in
download at the same time (good for small file overhead) and full
speed (multiple mirror for each file)

Also from your presentation it seems like ParallelDownloads will hit
only one server; it says sync issue, not really sure what you meant
there, afaik each package is downloaded with full versioning in the
name.

> So if you have an update with 150 package, every single one starts
> downloading at the same time?
> [...]
> Any implementation of this needs to respect the ParallelDownloads
> configuration option.

in this patch it is left to the XferCommand/XferLockCommand
implementation. Also the idea is that XferLockCommand may print on
stdout the information relative to the status of the download, and
those relaid back to the user (I may be wrong but this is the current
behaviour); this way the user will not be left wondering what is going
on.

-- Alternative 1 --
Add to XferLockCommands argument the number of maximum parallel
downloads; if the number is reached, the command block.
so the pseudocode became

    for each file
        XferCommand  //start one download
        XferLockCommand 10 // block if all 10 download slot are used
    XferLockCommand 0 // special case, block until all download are completed

-- Alternative 2 --
build an array of PID, each one refer to a XferCommand, but I am not
sure how much this would be portable and if there may be issues with
PID reuse; but would give pacman a bit more control on the running
process.

> Why even need this?  A user either has ParallelDownloads set to be
> greater than 1, or does not.

As far as I understand from the code in dload.c, ParallelDownloads
does not affect XferCommand, only one XferCommand is running and
expected to complete before the next is run.

> There are no test failures.  My guess is you need to install fakechroot.

ah yes, I can't find any docs on how the system should be built and
test run, so i just went for a "meson compile" and "meson test" in a
VM.

> Also, please don't top post.

whops, sorry
Allan McRae Oct. 21, 2020, 4 a.m. UTC | #5
On 21/10/20 11:54 am, lesto fante wrote:
> Hi,
> The general idea is to make it possible to have multiple XferCommand
> running in parallel.
> Rather than trying to keep track of multiple XferCommand, I thought it
> would be much easier to let XferCommand to fork/send request to a
> daemon and die; then let pacman call a final script `XferLockCommand`
> that will block until all download are completed, it will return the
> classic 0 on success and -1 on error.
> 
> After the introduction of the parallel download, it has been given an
> informal greenlight to submit a patch for XferCommand, so here I am.
> 
> As I choose simplicity, there is currently no way for pacman to know
> how many downloads are happening in the background, their status,
> which one did fail, just the final result success/error.

So, you are just passing the full list of files to download to a
download script.  Downloads are not managed by pacman at all?

Just add three more lines to your script:

pacman -Sy
pacman -Sup --noconfirm
<downloads here>
pacman -Su

I don't see the point of implementing parallel XferCommand like that
within pacman at all.

> I see 2 major slowdown to my downloads;
> - small file overhead
> - mirror bandwidth
> 
> Currently I have a script that will pick all uncommented servers in
> pacman list, divide them in groups of 3, and for each group download
> one package. This make sure there is only one connection for server (i
> assume server will not artificially limit the bandwidth, and if they
> do i don't want to bypass their limit) while having multiple file in
> download at the same time (good for small file overhead) and full
> speed (multiple mirror for each file)
> 
> Also from your presentation it seems like ParallelDownloads will hit
> only one server; it says sync issue, not really sure what you meant
> there, afaik each package is downloaded with full versioning in the
> name.

It currently does.  In the future that may change.   At the moment our
download error output is not great, and servers out of sync would result
in a lot of download errors.  We need to add logic to catch bad servers
and exclude them for future downloads, but fixing the output needs to
happen first.

>> So if you have an update with 150 package, every single one starts
>> downloading at the same time?
>> [...]
>> Any implementation of this needs to respect the ParallelDownloads
>> configuration option.
> 
> in this patch it is left to the XferCommand/XferLockCommand
> implementation. Also the idea is that XferLockCommand may print on
> stdout the information relative to the status of the download, and
> those relaid back to the user (I may be wrong but this is the current
> behaviour); this way the user will not be left wondering what is going
> on.
> 
> -- Alternative 1 --
> Add to XferLockCommands argument the number of maximum parallel
> downloads; if the number is reached, the command block.
> so the pseudocode became
> 
>     for each file
>         XferCommand  //start one download
>         XferLockCommand 10 // block if all 10 download slot are used
>     XferLockCommand 0 // special case, block until all download are completed
> 
> -- Alternative 2 --
> build an array of PID, each one refer to a XferCommand, but I am not
> sure how much this would be portable and if there may be issues with
> PID reuse; but would give pacman a bit more control on the running
> process.
> 

Pacman currently monitors a single download in a portable way.  I see no
reason it could not monitor more than one.  Then it could use
ParallelDownloads and provided some consistency across download methods.

>> Why even need this?  A user either has ParallelDownloads set to be
>> greater than 1, or does not.
> 
> As far as I understand from the code in dload.c, ParallelDownloads
> does not affect XferCommand, only one XferCommand is running and
> expected to complete before the next is run.

It does not...  I'd expect it would after an addition to XferCommand to
support parallel downloads.
lesto fante Oct. 21, 2020, 12:40 p.m. UTC | #6
> So, you are just passing the full list of files to download to a
> download script.  Downloads are not managed by pacman at all?

Exactly, my understanding is that with XferCommand we delegate an
external command to manage the downloads.
The advantage of having a dedicated wait command/parameter for the
last packet to download is that this final command can act as a
collection of information.

The only reason I do not call XferCommand with a list of all the
packages, server, and other options like ParalleDownalods as parameter
is because i fear to hit the parameter limit on some supported
OS/kernel I am not aware of;
but i feel maybe this is the best option, give to the download program
all he has to known.

The idea of having a single command or a dedicate wait
command/parameter imho is very important as this will help to have a
decent output of the external program progress, as it will be managed
by the program itself

> Just add three more lines to your script:
>
> pacman -Sy
> pacman -Sup --noconfirm
> <downloads here>
> pacman -Su

Yes, this is more or less what my actual script is doing.
I don't think it is the right solution as it became a multistage
process (error prone) or a pacman wrapper (don't like too much as this
is adding only a small modification, and will interfere with all the
other wrappers and possibly command line options) .

> I don't see the point of implementing parallel XferCommand like that
> within pacman at all.

Convenience and integration with other wrappers.

> Pacman currently monitors a single download in a portable way.  I see no
> reason it could not monitor more than one.  Then it could use
> ParallelDownloads and provided some consistency across download methods.

because I am not sure if tracking PID is portable and would require a
deeper modification of `fetchcb`, probably to return a PID handle.
Pacman could track every process alive/dead, but would not know any
other information like internal prograss.
If you think this is the best way, I will implement it.

> It does not...  I'd expect it would after an addition to XferCommand to
> support parallel downloads.

ok, so i can implement the tracking of the PID, but before writing any
more code I want to make sure this solution is acceptable;
and if it is, any implementation suggestion is welcome, if not, what
you think is the solution.

so to recap:

- solution 1 -
XferCommand called multiple times, non blocking, and a final
XferCommand with special parameter/XferLockCommand to wait for output.
We trust XferCommand to start only ParallelDownloads download.

- solution 2 -
We call XferCommand ParallelDownloads times, and wait for process to
complete before calling again

- solution 3 -
XferParallelCommand is added, it will be called with a list of all
packages, servers and options like ParallelDownloads.
We trust XferCommand to start only ParallelDownloads download.
lesto fante Oct. 31, 2020, 1:26 p.m. UTC | #7
> ok, so i can implement the tracking of the PID, but before writing any
> more code I want to make sure this solution is acceptable;
> and if it is, any implementation suggestion is welcome, if not, what
> you think is the solution.
>
> so to recap:
>
> - solution 1 -
> XferCommand called multiple times, non blocking, and a final
> XferCommand with special parameter/XferLockCommand to wait for output.
> We trust XferCommand to start only ParallelDownloads download.
>
> - solution 2 -
> We call XferCommand ParallelDownloads times, and wait for process to
> complete before calling again
>
> - solution 3 -
> XferParallelCommand is added, it will be called with a list of all
> packages, servers and options like ParallelDownloads.
> We trust XferCommand to start only ParallelDownloads download.

@Allan McRae any input on this one?

Patch

diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
index 614a530c..c2af60e8 100644
--- a/lib/libalpm/alpm.h
+++ b/lib/libalpm/alpm.h
@@ -755,6 +755,11 @@  typedef void (*alpm_cb_totaldl)(off_t total);
 typedef int (*alpm_cb_fetch)(const char *url, const char *localpath,
 		int force);
 
+/** A callback for waiting for download of files
+ * @return 0 on success, -1 on error.
+ */
+typedef int (*alpm_cb_fetch_lock)(void);
+
 /** Fetch a list of remote packages.
  * @param handle the context handle
  * @param urls list of package URLs to download
@@ -787,6 +792,11 @@  alpm_cb_fetch alpm_option_get_fetchcb(alpm_handle_t *handle);
 /** Sets the downloading callback. */
 int alpm_option_set_fetchcb(alpm_handle_t *handle, alpm_cb_fetch cb);
 
+/** Returns the downloading lock callback. */
+alpm_cb_fetch_lock alpm_option_get_fetch_lockcb(alpm_handle_t *handle);
+/** Sets the downloading lock callback. */
+int alpm_option_set_fetch_lockcb(alpm_handle_t *handle, alpm_cb_fetch_lock cb);
+
 /** Returns the callback used to report total download size. */
 alpm_cb_totaldl alpm_option_get_totaldlcb(alpm_handle_t *handle);
 /** Sets the callback used to report total download size. */
diff --git a/lib/libalpm/dload.c b/lib/libalpm/dload.c
index 673e769f..174d559d 100644
--- a/lib/libalpm/dload.c
+++ b/lib/libalpm/dload.c
@@ -824,6 +824,15 @@  int _alpm_download(alpm_handle_t *handle,
 				RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
 			}
 		}
+
+		if (handle->fetch_lockcb != NULL) {
+			// if fetch_lockcb is set, fetchcb is non-blocking; here we wait for all download to complete
+			int ret = handle->fetch_lockcb();
+			if (ret == -1) {
+				RET_ERR(handle, ALPM_ERR_EXTERNAL_DOWNLOAD, -1);
+			}
+		}
+
 		return 0;
 	}
 }
diff --git a/lib/libalpm/handle.c b/lib/libalpm/handle.c
index 1310601a..683e678d 100644
--- a/lib/libalpm/handle.c
+++ b/lib/libalpm/handle.c
@@ -174,6 +174,12 @@  alpm_cb_fetch SYMEXPORT alpm_option_get_fetchcb(alpm_handle_t *handle)
 	return handle->fetchcb;
 }
 
+alpm_cb_fetch_lock SYMEXPORT alpm_option_get_fetch_lockcb(alpm_handle_t *handle)
+{
+	CHECK_HANDLE(handle, return NULL);
+	return handle->fetch_lockcb;
+}
+
 alpm_cb_totaldl SYMEXPORT alpm_option_get_totaldlcb(alpm_handle_t *handle)
 {
 	CHECK_HANDLE(handle, return NULL);
@@ -321,6 +327,13 @@  int SYMEXPORT alpm_option_set_fetchcb(alpm_handle_t *handle, alpm_cb_fetch cb)
 	return 0;
 }
 
+int SYMEXPORT alpm_option_set_fetch_lockcb(alpm_handle_t *handle, alpm_cb_fetch_lock cb)
+{
+	CHECK_HANDLE(handle, return -1);
+	handle->fetch_lockcb = cb;
+	return 0;
+}
+
 int SYMEXPORT alpm_option_set_totaldlcb(alpm_handle_t *handle, alpm_cb_totaldl cb)
 {
 	CHECK_HANDLE(handle, return -1);
diff --git a/lib/libalpm/handle.h b/lib/libalpm/handle.h
index 9fef0fbf..dc00751b 100644
--- a/lib/libalpm/handle.h
+++ b/lib/libalpm/handle.h
@@ -73,6 +73,7 @@  struct __alpm_handle_t {
 	alpm_cb_download dlcb;      /* Download callback function */
 	alpm_cb_totaldl totaldlcb;  /* Total download callback function */
 	alpm_cb_fetch fetchcb;      /* Download file callback function */
+	alpm_cb_fetch_lock fetch_lockcb;        /* Download lock file callback function */
 	alpm_cb_event eventcb;
 	alpm_cb_question questioncb;
 	alpm_cb_progress progresscb;
diff --git a/src/pacman/conf.c b/src/pacman/conf.c
index 3a3ef605..53de73b8 100644
--- a/src/pacman/conf.c
+++ b/src/pacman/conf.c
@@ -157,6 +157,7 @@  int config_free(config_t *oldconfig)
 	FREELIST(oldconfig->hookdirs);
 	FREELIST(oldconfig->cachedirs);
 	free(oldconfig->xfercommand);
+	free(oldconfig->xferlockcommand);
 	free(oldconfig->print_format);
 	free(oldconfig->arch);
 	wordsplit_free(oldconfig->xfercommand_argv);
@@ -319,7 +320,9 @@  static int download_with_xfercommand(const char *url, const char *localpath,
 	for(i = 0; i <= config->xfercommand_argc; i++) {
 		const char *val = config->xfercommand_argv[i];
 		if(val && strcmp(val, "%o") == 0) {
-			usepart = 1;
+			if (config->xferlockcommand == NULL) {
+				usepart = 1;
+			}
 			val = tempfile;
 		} else if(val && strcmp(val, "%u") == 0) {
 			val = url;
diff --git a/src/pacman/conf.h b/src/pacman/conf.h
index b8a451ad..1a9d637d 100644
--- a/src/pacman/conf.h
+++ b/src/pacman/conf.h
@@ -130,6 +130,7 @@  typedef struct __config_t {
 	char *xfercommand;
 	char **xfercommand_argv;
 	size_t xfercommand_argc;
+	char *xferlockcommand;
 
 	/* our connection to libalpm */
 	alpm_handle_t *handle;
diff --git a/src/pacman/pacman-conf.c b/src/pacman/pacman-conf.c
index 463badf1..7c4f4cc9 100644
--- a/src/pacman/pacman-conf.c
+++ b/src/pacman/pacman-conf.c
@@ -259,6 +259,7 @@  static void dump_config(void)
 
 	show_str("Architecture", config->arch);
 	show_str("XferCommand", config->xfercommand);
+	show_str("XferLockCommand", config->xferlockcommand);
 
 	show_bool("UseSyslog", config->usesyslog);
 	show_bool("Color", config->color);
@@ -366,6 +367,8 @@  static int list_directives(void)
 			show_str("Architecture", config->arch);
 		} else if(strcasecmp(i->data, "XferCommand") == 0) {
 			show_str("XferCommand", config->xfercommand);
+		} else if(strcasecmp(i->data, "XferLockCommand") == 0) {
+			show_str("XferLockCommand", config->xferlockcommand);
 
 		} else if(strcasecmp(i->data, "UseSyslog") == 0) {
 			show_bool("UseSyslog", config->usesyslog);