[pacman-dev,v2] Fallback to detached signatures during keyring check

Message ID 20200531025104.150497-1-anatol.pomozov@gmail.com
State Superseded, archived
Headers show
Series [pacman-dev,v2] Fallback to detached signatures during keyring check | expand

Commit Message

Anatol Pomozov May 31, 2020, 2:51 a.m. UTC
Pacman has a 'key in keyring' verification step that makes sure the signatures
have a valid keyid. Currently pacman parses embedded package signatures only.

Add a fallback to detached signatures. If embedded signature is missing then it
tries to read corresponding *.sig file and get keyid from there.

Verification:
  debug: found cached pkg: /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst
  debug: found detached signature /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst.sig with size 310
  debug: found signature key: A5E9288C4FA415FA
  debug: looking up key A5E9288C4FA415FA locally
  debug: key lookup success, key exists

Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
---
 lib/libalpm/alpm.h    | 10 ++++++++++
 lib/libalpm/package.c | 31 +++++++++++++++++++++++++++++++
 lib/libalpm/sync.c    | 17 ++++++++---------
 lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++++
 lib/libalpm/util.h    |  1 +
 5 files changed, 85 insertions(+), 9 deletions(-)

Comments

Anatol Pomozov May 31, 2020, 2:52 a.m. UTC | #1
Hi

V2 patch avoids incorrect "int" -> "size_t" conversion in
check_keyring() function.
Andrew Gregory May 31, 2020, 4:31 a.m. UTC | #2
On 05/30/20 at 07:51pm, Anatol Pomozov wrote:
> Pacman has a 'key in keyring' verification step that makes sure the signatures
> have a valid keyid. Currently pacman parses embedded package signatures only.
> 
> Add a fallback to detached signatures. If embedded signature is missing then it
> tries to read corresponding *.sig file and get keyid from there.
> 
> Verification:
>   debug: found cached pkg: /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst
>   debug: found detached signature /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst.sig with size 310
>   debug: found signature key: A5E9288C4FA415FA
>   debug: looking up key A5E9288C4FA415FA locally
>   debug: key lookup success, key exists
> 
> Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
> ---
>  lib/libalpm/alpm.h    | 10 ++++++++++
>  lib/libalpm/package.c | 31 +++++++++++++++++++++++++++++++
>  lib/libalpm/sync.c    | 17 ++++++++---------
>  lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++++
>  lib/libalpm/util.h    |  1 +
>  5 files changed, 85 insertions(+), 9 deletions(-)
> 
> diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> index c6a13273..9c5fff73 100644
> --- a/lib/libalpm/alpm.h
> +++ b/lib/libalpm/alpm.h
> @@ -1403,6 +1403,16 @@ alpm_db_t *alpm_pkg_get_db(alpm_pkg_t *pkg);
>   */
>  const char *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg);
>  
> +/** Extracts package signature either from embedded package signature
> + * or if it is absent then reads data from detached signature file.
> + * @param pkg a pointer to package
> + * @param sig output parameter for signature data. Callee function allocates
> + * buffer needed for the signature data. Caller is responsible for
> + * freeing this buffer.
> + * @return size of a signature or negative number if error.
> + */
> +int alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig);
> +
>  /** Returns the method used to validate a package during install.
>   * @param pkg a pointer to package
>   * @return an enum member giving the validation method
> diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
> index 5c5fa073..e0e4d987 100644
> --- a/lib/libalpm/package.c
> +++ b/lib/libalpm/package.c
> @@ -268,6 +268,37 @@ const char SYMEXPORT *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg)
>  	return pkg->base64_sig;
>  }
>  
> +int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig)

This API is weird, why are you returning an int when neither of the
successful values are int and can potentially overflow an int?  You're
returning the length of an allocated string, size_t is the appropriate
type.

> +{
> +	ASSERT(pkg != NULL, return -1);
> +	pkg->handle->pm_errno = ALPM_ERR_OK;

I don't like clearing pm_errno without a good reason.  It generally
doesn't serve any purpose and can get us into trouble if a function
gets called during cleanup somewhere.

> +	if(pkg->base64_sig) {
> +		size_t sig_len;
> +		int ret = alpm_decode_signature(pkg->base64_sig, sig, &sig_len);
> +		return ret == 0 ? (int)sig_len : -1;
> +	} else {
> +		char *pkgpath, *sigpath;
> +		int len;
> +
> +		pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
> +		if(!pkgpath) {
> +			RET_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND, -1);
> +		}
> +		sigpath = _alpm_sigpath(pkg->handle, pkgpath);
> +		if(!sigpath || _alpm_access(pkg->handle, NULL, sigpath, R_OK)) {
> +			FREE(pkgpath);
> +			FREE(sigpath);
> +			RET_ERR(pkg->handle, ALPM_ERR_SIG_MISSING, -1);
> +		}
> +		len = _alpm_read_file(sigpath, sig);

You need to check for and handle failure.

> +		_alpm_log(pkg->handle, ALPM_LOG_DEBUG, "found detached signature %s with size %d\n", sigpath, len);
> +		FREE(pkgpath);
> +		FREE(sigpath);
> +		return len;
> +	}
> +}
> +
>  const char SYMEXPORT *alpm_pkg_get_arch(alpm_pkg_t *pkg)
>  {
>  	ASSERT(pkg != NULL, return NULL);
> diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> index 8c01ad95..0ab0fe26 100644
> --- a/lib/libalpm/sync.c
> +++ b/lib/libalpm/sync.c
> @@ -880,18 +880,17 @@ static int check_keyring(alpm_handle_t *handle)
>  		}
>  
>  		level = alpm_db_get_siglevel(alpm_pkg_get_db(pkg));
> -		if((level & ALPM_SIG_PACKAGE) && pkg->base64_sig) {
> -			unsigned char *decoded_sigdata = NULL;
> -			size_t data_len;
> -			int decode_ret = alpm_decode_signature(pkg->base64_sig,
> -					&decoded_sigdata, &data_len);
> -			if(decode_ret == 0) {
> +		if((level & ALPM_SIG_PACKAGE)) {
> +			unsigned char *signature = NULL;
> +			int sig_len = alpm_pkg_get_sig(pkg, &signature);
> +			if(sig_len > 0) {
>  				alpm_list_t *keys = NULL;
> -				if(alpm_extract_keyid(handle, pkg->name, decoded_sigdata,
> -							data_len, &keys) == 0) {
> +				if(alpm_extract_keyid(handle, pkg->name, signature,
> +							sig_len, &keys) == 0) {
>  					alpm_list_t *k;
>  					for(k = keys; k; k = k->next) {
>  						char *key = k->data;
> +						_alpm_log(handle, ALPM_LOG_DEBUG, "found signature key: %s\n", key);
>  						if(!alpm_list_find(errors, key, key_cmp) &&
>  								_alpm_key_in_keychain(handle, key) == 0) {
>  							keyinfo = malloc(sizeof(struct keyinfo_t));
> @@ -905,8 +904,8 @@ static int check_keyring(alpm_handle_t *handle)
>  					}
>  					FREELIST(keys);
>  				}
> -				free(decoded_sigdata);
>  			}
> +			free(signature);
>  		}
>  	}
>  
> diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
> index 76728eb4..3d57817b 100644
> --- a/lib/libalpm/util.c
> +++ b/lib/libalpm/util.c
> @@ -1489,3 +1489,38 @@ void _alpm_alloc_fail(size_t size)
>  {
>  	fprintf(stderr, "alloc failure: could not allocate %zu bytes\n", size);
>  }
> +
> +/** This functions reads file content.
> + *
> + * Memory buffer is allocated by the callee function. It is responsibility
> + * of the caller to free the buffer
> + *
> + * @param filepath filepath to read
> + * @param data pointer to output buffer
> + * @return size of the data read or negative number in case of error
> + */
> +int _alpm_read_file(const char *filepath, unsigned char **data)

Same as above.

> +{
> +	struct stat st;
> +	FILE *fp;
> +
> +	if((fp = fopen(filepath, "rb")) == NULL) {
> +		return -1;
> +	}
> +
> +	if(fstat(fileno(fp), &st) != 0) {
> +		fclose(fp);
> +		return -1;
> +	}
> +
> +	MALLOC(*data, st.st_size, fclose(fp); return -1);
> +
> +	if(fread(*data, st.st_size, 1, fp) != 1) {
> +		FREE(*data);
> +		fclose(fp);
> +		return -1;
> +	}
> +
> +	fclose(fp);
> +	return st.st_size;
> +}
> diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
> index 4fc6e718..50a94489 100644
> --- a/lib/libalpm/util.h
> +++ b/lib/libalpm/util.h
> @@ -155,6 +155,7 @@ int _alpm_fnmatch_patterns(alpm_list_t *patterns, const char *string);
>  int _alpm_fnmatch(const void *pattern, const void *string);
>  void *_alpm_realloc(void **data, size_t *current, const size_t required);
>  void *_alpm_greedy_grow(void **data, size_t *current, const size_t required);
> +int _alpm_read_file(const char *filepath, unsigned char **data);
>  
>  #ifndef HAVE_STRSEP
>  char *strsep(char **, const char *);
> -- 
> 2.26.2
Anatol Pomozov May 31, 2020, 8:37 a.m. UTC | #3
Hi Andrew, thank you for the quick response

On Sat, May 30, 2020 at 9:31 PM Andrew Gregory
<andrew.gregory.8@gmail.com> wrote:
>
> On 05/30/20 at 07:51pm, Anatol Pomozov wrote:
> > Pacman has a 'key in keyring' verification step that makes sure the signatures
> > have a valid keyid. Currently pacman parses embedded package signatures only.
> >
> > Add a fallback to detached signatures. If embedded signature is missing then it
> > tries to read corresponding *.sig file and get keyid from there.
> >
> > Verification:
> >   debug: found cached pkg: /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst
> >   debug: found detached signature /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst.sig with size 310
> >   debug: found signature key: A5E9288C4FA415FA
> >   debug: looking up key A5E9288C4FA415FA locally
> >   debug: key lookup success, key exists
> >
> > Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
> > ---
> >  lib/libalpm/alpm.h    | 10 ++++++++++
> >  lib/libalpm/package.c | 31 +++++++++++++++++++++++++++++++
> >  lib/libalpm/sync.c    | 17 ++++++++---------
> >  lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++++
> >  lib/libalpm/util.h    |  1 +
> >  5 files changed, 85 insertions(+), 9 deletions(-)
> >
> > diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> > index c6a13273..9c5fff73 100644
> > --- a/lib/libalpm/alpm.h
> > +++ b/lib/libalpm/alpm.h
> > @@ -1403,6 +1403,16 @@ alpm_db_t *alpm_pkg_get_db(alpm_pkg_t *pkg);
> >   */
> >  const char *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg);
> >
> > +/** Extracts package signature either from embedded package signature
> > + * or if it is absent then reads data from detached signature file.
> > + * @param pkg a pointer to package
> > + * @param sig output parameter for signature data. Callee function allocates
> > + * buffer needed for the signature data. Caller is responsible for
> > + * freeing this buffer.
> > + * @return size of a signature or negative number if error.
> > + */
> > +int alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig);
> > +
> >  /** Returns the method used to validate a package during install.
> >   * @param pkg a pointer to package
> >   * @return an enum member giving the validation method
> > diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
> > index 5c5fa073..e0e4d987 100644
> > --- a/lib/libalpm/package.c
> > +++ b/lib/libalpm/package.c
> > @@ -268,6 +268,37 @@ const char SYMEXPORT *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg)
> >       return pkg->base64_sig;
> >  }
> >
> > +int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig)
>
> This API is weird, why are you returning an int when neither of the
> successful values are int and can potentially overflow an int?  You're
> returning the length of an allocated string, size_t is the appropriate
> type.

size_t is unsigned int. But this function returns positive length in
case of successful signature read or negative value in case of error.
Thus return value cannot be size_t.

>
> > +{
> > +     ASSERT(pkg != NULL, return -1);
> > +     pkg->handle->pm_errno = ALPM_ERR_OK;
>
> I don't like clearing pm_errno without a good reason.  It generally
> doesn't serve any purpose and can get us into trouble if a function
> gets called during cleanup somewhere.

I personally do not feel that "pm_errno = ALPM_ERR_OK" is useful here
and I am more than happy to remove it.

But I also I see this pattern is used a lot. Only this file
(lib/libalpm/package.c) has like 20 use-cases of it. e.g.

off_t SYMEXPORT alpm_pkg_get_isize(alpm_pkg_t *pkg)
{
        ASSERT(pkg != NULL, return -1);
        pkg->handle->pm_errno = ALPM_ERR_OK;
        return pkg->ops->get_isize(pkg);
}

So I was trying to follow a standard practice here. Or are you trying
to say that lib/libalpm/package.c examples I was looking at are
incorrect?

>
> > +     if(pkg->base64_sig) {
> > +             size_t sig_len;
> > +             int ret = alpm_decode_signature(pkg->base64_sig, sig, &sig_len);
> > +             return ret == 0 ? (int)sig_len : -1;
> > +     } else {
> > +             char *pkgpath, *sigpath;
> > +             int len;
> > +
> > +             pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
> > +             if(!pkgpath) {
> > +                     RET_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND, -1);
> > +             }
> > +             sigpath = _alpm_sigpath(pkg->handle, pkgpath);
> > +             if(!sigpath || _alpm_access(pkg->handle, NULL, sigpath, R_OK)) {
> > +                     FREE(pkgpath);
> > +                     FREE(sigpath);
> > +                     RET_ERR(pkg->handle, ALPM_ERR_SIG_MISSING, -1);
> > +             }
> > +             len = _alpm_read_file(sigpath, sig);
>
> You need to check for and handle failure.

Do you mean to handle a file read failure? But both successful and
erroneous codepaths are the same - free() the resources and return
"len".

> > +             _alpm_log(pkg->handle, ALPM_LOG_DEBUG, "found detached signature %s with size %d\n", sigpath, len);
> > +             FREE(pkgpath);
> > +             FREE(sigpath);
> > +             return len;
> > +     }
> > +}
> > +
> >  const char SYMEXPORT *alpm_pkg_get_arch(alpm_pkg_t *pkg)
> >  {
> >       ASSERT(pkg != NULL, return NULL);
> > diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> > index 8c01ad95..0ab0fe26 100644
> > --- a/lib/libalpm/sync.c
> > +++ b/lib/libalpm/sync.c
> > @@ -880,18 +880,17 @@ static int check_keyring(alpm_handle_t *handle)
> >               }
> >
> >               level = alpm_db_get_siglevel(alpm_pkg_get_db(pkg));
> > -             if((level & ALPM_SIG_PACKAGE) && pkg->base64_sig) {
> > -                     unsigned char *decoded_sigdata = NULL;
> > -                     size_t data_len;
> > -                     int decode_ret = alpm_decode_signature(pkg->base64_sig,
> > -                                     &decoded_sigdata, &data_len);
> > -                     if(decode_ret == 0) {
> > +             if((level & ALPM_SIG_PACKAGE)) {
> > +                     unsigned char *signature = NULL;
> > +                     int sig_len = alpm_pkg_get_sig(pkg, &signature);
> > +                     if(sig_len > 0) {
> >                               alpm_list_t *keys = NULL;
> > -                             if(alpm_extract_keyid(handle, pkg->name, decoded_sigdata,
> > -                                                     data_len, &keys) == 0) {
> > +                             if(alpm_extract_keyid(handle, pkg->name, signature,
> > +                                                     sig_len, &keys) == 0) {
> >                                       alpm_list_t *k;
> >                                       for(k = keys; k; k = k->next) {
> >                                               char *key = k->data;
> > +                                             _alpm_log(handle, ALPM_LOG_DEBUG, "found signature key: %s\n", key);
> >                                               if(!alpm_list_find(errors, key, key_cmp) &&
> >                                                               _alpm_key_in_keychain(handle, key) == 0) {
> >                                                       keyinfo = malloc(sizeof(struct keyinfo_t));
> > @@ -905,8 +904,8 @@ static int check_keyring(alpm_handle_t *handle)
> >                                       }
> >                                       FREELIST(keys);
> >                               }
> > -                             free(decoded_sigdata);
> >                       }
> > +                     free(signature);
> >               }
> >       }
> >
> > diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
> > index 76728eb4..3d57817b 100644
> > --- a/lib/libalpm/util.c
> > +++ b/lib/libalpm/util.c
> > @@ -1489,3 +1489,38 @@ void _alpm_alloc_fail(size_t size)
> >  {
> >       fprintf(stderr, "alloc failure: could not allocate %zu bytes\n", size);
> >  }
> > +
> > +/** This functions reads file content.
> > + *
> > + * Memory buffer is allocated by the callee function. It is responsibility
> > + * of the caller to free the buffer
> > + *
> > + * @param filepath filepath to read
> > + * @param data pointer to output buffer
> > + * @return size of the data read or negative number in case of error
> > + */
> > +int _alpm_read_file(const char *filepath, unsigned char **data)
>
> Same as above.
>
> > +{
> > +     struct stat st;
> > +     FILE *fp;
> > +
> > +     if((fp = fopen(filepath, "rb")) == NULL) {
> > +             return -1;
> > +     }
> > +
> > +     if(fstat(fileno(fp), &st) != 0) {
> > +             fclose(fp);
> > +             return -1;
> > +     }
> > +
> > +     MALLOC(*data, st.st_size, fclose(fp); return -1);
> > +
> > +     if(fread(*data, st.st_size, 1, fp) != 1) {
> > +             FREE(*data);
> > +             fclose(fp);
> > +             return -1;
> > +     }
> > +
> > +     fclose(fp);
> > +     return st.st_size;
> > +}
> > diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
> > index 4fc6e718..50a94489 100644
> > --- a/lib/libalpm/util.h
> > +++ b/lib/libalpm/util.h
> > @@ -155,6 +155,7 @@ int _alpm_fnmatch_patterns(alpm_list_t *patterns, const char *string);
> >  int _alpm_fnmatch(const void *pattern, const void *string);
> >  void *_alpm_realloc(void **data, size_t *current, const size_t required);
> >  void *_alpm_greedy_grow(void **data, size_t *current, const size_t required);
> > +int _alpm_read_file(const char *filepath, unsigned char **data);
> >
> >  #ifndef HAVE_STRSEP
> >  char *strsep(char **, const char *);
> > --
> > 2.26.2
Andrew Gregory May 31, 2020, 10:23 a.m. UTC | #4
On 05/31/20 at 01:37am, Anatol Pomozov wrote:
> Hi Andrew, thank you for the quick response
> 
> On Sat, May 30, 2020 at 9:31 PM Andrew Gregory
> <andrew.gregory.8@gmail.com> wrote:
> >
> > On 05/30/20 at 07:51pm, Anatol Pomozov wrote:
> > > Pacman has a 'key in keyring' verification step that makes sure the signatures
> > > have a valid keyid. Currently pacman parses embedded package signatures only.
> > >
> > > Add a fallback to detached signatures. If embedded signature is missing then it
> > > tries to read corresponding *.sig file and get keyid from there.
> > >
> > > Verification:
> > >   debug: found cached pkg: /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst
> > >   debug: found detached signature /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst.sig with size 310
> > >   debug: found signature key: A5E9288C4FA415FA
> > >   debug: looking up key A5E9288C4FA415FA locally
> > >   debug: key lookup success, key exists
> > >
> > > Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
> > > ---
> > >  lib/libalpm/alpm.h    | 10 ++++++++++
> > >  lib/libalpm/package.c | 31 +++++++++++++++++++++++++++++++
> > >  lib/libalpm/sync.c    | 17 ++++++++---------
> > >  lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++++
> > >  lib/libalpm/util.h    |  1 +
> > >  5 files changed, 85 insertions(+), 9 deletions(-)
> > >
> > > diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> > > index c6a13273..9c5fff73 100644
> > > --- a/lib/libalpm/alpm.h
> > > +++ b/lib/libalpm/alpm.h
> > > @@ -1403,6 +1403,16 @@ alpm_db_t *alpm_pkg_get_db(alpm_pkg_t *pkg);
> > >   */
> > >  const char *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg);
> > >
> > > +/** Extracts package signature either from embedded package signature
> > > + * or if it is absent then reads data from detached signature file.
> > > + * @param pkg a pointer to package
> > > + * @param sig output parameter for signature data. Callee function allocates
> > > + * buffer needed for the signature data. Caller is responsible for
> > > + * freeing this buffer.
> > > + * @return size of a signature or negative number if error.
> > > + */
> > > +int alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig);
> > > +
> > >  /** Returns the method used to validate a package during install.
> > >   * @param pkg a pointer to package
> > >   * @return an enum member giving the validation method
> > > diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
> > > index 5c5fa073..e0e4d987 100644
> > > --- a/lib/libalpm/package.c
> > > +++ b/lib/libalpm/package.c
> > > @@ -268,6 +268,37 @@ const char SYMEXPORT *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg)
> > >       return pkg->base64_sig;
> > >  }
> > >
> > > +int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig)
> >
> > This API is weird, why are you returning an int when neither of the
> > successful values are int and can potentially overflow an int?  You're
> > returning the length of an allocated string, size_t is the appropriate
> > type.
> 
> size_t is unsigned int. But this function returns positive length in
> case of successful signature read or negative value in case of error.
> Thus return value cannot be size_t.

I get that, but you chose that interface.  Why?  Surely it makes more
sense to either return 0 on error, since you treat a missing sig as an
error anyway, or take a size_t* arg like decode_signature.

> >
> > > +{
> > > +     ASSERT(pkg != NULL, return -1);
> > > +     pkg->handle->pm_errno = ALPM_ERR_OK;
> >
> > I don't like clearing pm_errno without a good reason.  It generally
> > doesn't serve any purpose and can get us into trouble if a function
> > gets called during cleanup somewhere.
> 
> I personally do not feel that "pm_errno = ALPM_ERR_OK" is useful here
> and I am more than happy to remove it.
> 
> But I also I see this pattern is used a lot. Only this file
> (lib/libalpm/package.c) has like 20 use-cases of it. e.g.
> 
> off_t SYMEXPORT alpm_pkg_get_isize(alpm_pkg_t *pkg)
> {
>         ASSERT(pkg != NULL, return -1);
>         pkg->handle->pm_errno = ALPM_ERR_OK;
>         return pkg->ops->get_isize(pkg);
> }
> 
> So I was trying to follow a standard practice here. Or are you trying
> to say that lib/libalpm/package.c examples I was looking at are
> incorrect?

I consider this an unfortunate anti-pattern.  It has bitten us in the
past by clearing pm_errno during cleanup just like I said.  I haven't
been motivated enough to go through and remove it so far, but I don't
want to add any more instances of it.

> > > +     if(pkg->base64_sig) {
> > > +             size_t sig_len;
> > > +             int ret = alpm_decode_signature(pkg->base64_sig, sig, &sig_len);
> > > +             return ret == 0 ? (int)sig_len : -1;
> > > +     } else {
> > > +             char *pkgpath, *sigpath;
> > > +             int len;
> > > +
> > > +             pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
> > > +             if(!pkgpath) {
> > > +                     RET_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND, -1);
> > > +             }
> > > +             sigpath = _alpm_sigpath(pkg->handle, pkgpath);
> > > +             if(!sigpath || _alpm_access(pkg->handle, NULL, sigpath, R_OK)) {
> > > +                     FREE(pkgpath);
> > > +                     FREE(sigpath);
> > > +                     RET_ERR(pkg->handle, ALPM_ERR_SIG_MISSING, -1);
> > > +             }
> > > +             len = _alpm_read_file(sigpath, sig);
> >
> > You need to check for and handle failure.
> 
> Do you mean to handle a file read failure? But both successful and
> erroneous codepaths are the same - free() the resources and return
> "len".

What is pm_errno if read_file fails?

> > > +             _alpm_log(pkg->handle, ALPM_LOG_DEBUG, "found detached signature %s with size %d\n", sigpath, len);
> > > +             FREE(pkgpath);
> > > +             FREE(sigpath);
> > > +             return len;
> > > +     }
> > > +}
> > > +
> > >  const char SYMEXPORT *alpm_pkg_get_arch(alpm_pkg_t *pkg)
> > >  {
> > >       ASSERT(pkg != NULL, return NULL);
> > > diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> > > index 8c01ad95..0ab0fe26 100644
> > > --- a/lib/libalpm/sync.c
> > > +++ b/lib/libalpm/sync.c
> > > @@ -880,18 +880,17 @@ static int check_keyring(alpm_handle_t *handle)
> > >               }
> > >
> > >               level = alpm_db_get_siglevel(alpm_pkg_get_db(pkg));
> > > -             if((level & ALPM_SIG_PACKAGE) && pkg->base64_sig) {
> > > -                     unsigned char *decoded_sigdata = NULL;
> > > -                     size_t data_len;
> > > -                     int decode_ret = alpm_decode_signature(pkg->base64_sig,
> > > -                                     &decoded_sigdata, &data_len);
> > > -                     if(decode_ret == 0) {
> > > +             if((level & ALPM_SIG_PACKAGE)) {
> > > +                     unsigned char *signature = NULL;
> > > +                     int sig_len = alpm_pkg_get_sig(pkg, &signature);
> > > +                     if(sig_len > 0) {
> > >                               alpm_list_t *keys = NULL;
> > > -                             if(alpm_extract_keyid(handle, pkg->name, decoded_sigdata,
> > > -                                                     data_len, &keys) == 0) {
> > > +                             if(alpm_extract_keyid(handle, pkg->name, signature,
> > > +                                                     sig_len, &keys) == 0) {
> > >                                       alpm_list_t *k;
> > >                                       for(k = keys; k; k = k->next) {
> > >                                               char *key = k->data;
> > > +                                             _alpm_log(handle, ALPM_LOG_DEBUG, "found signature key: %s\n", key);
> > >                                               if(!alpm_list_find(errors, key, key_cmp) &&
> > >                                                               _alpm_key_in_keychain(handle, key) == 0) {
> > >                                                       keyinfo = malloc(sizeof(struct keyinfo_t));
> > > @@ -905,8 +904,8 @@ static int check_keyring(alpm_handle_t *handle)
> > >                                       }
> > >                                       FREELIST(keys);
> > >                               }
> > > -                             free(decoded_sigdata);
> > >                       }
> > > +                     free(signature);
> > >               }
> > >       }
> > >
> > > diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
> > > index 76728eb4..3d57817b 100644
> > > --- a/lib/libalpm/util.c
> > > +++ b/lib/libalpm/util.c
> > > @@ -1489,3 +1489,38 @@ void _alpm_alloc_fail(size_t size)
> > >  {
> > >       fprintf(stderr, "alloc failure: could not allocate %zu bytes\n", size);
> > >  }
> > > +
> > > +/** This functions reads file content.
> > > + *
> > > + * Memory buffer is allocated by the callee function. It is responsibility
> > > + * of the caller to free the buffer
> > > + *
> > > + * @param filepath filepath to read
> > > + * @param data pointer to output buffer
> > > + * @return size of the data read or negative number in case of error
> > > + */
> > > +int _alpm_read_file(const char *filepath, unsigned char **data)
> >
> > Same as above.
> >
> > > +{
> > > +     struct stat st;
> > > +     FILE *fp;
> > > +
> > > +     if((fp = fopen(filepath, "rb")) == NULL) {
> > > +             return -1;
> > > +     }
> > > +
> > > +     if(fstat(fileno(fp), &st) != 0) {
> > > +             fclose(fp);
> > > +             return -1;
> > > +     }
> > > +
> > > +     MALLOC(*data, st.st_size, fclose(fp); return -1);
> > > +
> > > +     if(fread(*data, st.st_size, 1, fp) != 1) {
> > > +             FREE(*data);
> > > +             fclose(fp);
> > > +             return -1;
> > > +     }
> > > +
> > > +     fclose(fp);
> > > +     return st.st_size;
> > > +}
> > > diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
> > > index 4fc6e718..50a94489 100644
> > > --- a/lib/libalpm/util.h
> > > +++ b/lib/libalpm/util.h
> > > @@ -155,6 +155,7 @@ int _alpm_fnmatch_patterns(alpm_list_t *patterns, const char *string);
> > >  int _alpm_fnmatch(const void *pattern, const void *string);
> > >  void *_alpm_realloc(void **data, size_t *current, const size_t required);
> > >  void *_alpm_greedy_grow(void **data, size_t *current, const size_t required);
> > > +int _alpm_read_file(const char *filepath, unsigned char **data);
> > >
> > >  #ifndef HAVE_STRSEP
> > >  char *strsep(char **, const char *);
> > > --
> > > 2.26.2
Anatol Pomozov June 2, 2020, 6:44 a.m. UTC | #5
Hi

On Sun, May 31, 2020 at 3:24 AM Andrew Gregory
<andrew.gregory.8@gmail.com> wrote:
>
> On 05/31/20 at 01:37am, Anatol Pomozov wrote:
> > Hi Andrew, thank you for the quick response
> >
> > On Sat, May 30, 2020 at 9:31 PM Andrew Gregory
> > <andrew.gregory.8@gmail.com> wrote:
> > >
> > > On 05/30/20 at 07:51pm, Anatol Pomozov wrote:
> > > > Pacman has a 'key in keyring' verification step that makes sure the signatures
> > > > have a valid keyid. Currently pacman parses embedded package signatures only.
> > > >
> > > > Add a fallback to detached signatures. If embedded signature is missing then it
> > > > tries to read corresponding *.sig file and get keyid from there.
> > > >
> > > > Verification:
> > > >   debug: found cached pkg: /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst
> > > >   debug: found detached signature /var/cache/pacman/pkg/glib-networking-2.64.3-1-x86_64.pkg.tar.zst.sig with size 310
> > > >   debug: found signature key: A5E9288C4FA415FA
> > > >   debug: looking up key A5E9288C4FA415FA locally
> > > >   debug: key lookup success, key exists
> > > >
> > > > Signed-off-by: Anatol Pomozov <anatol.pomozov@gmail.com>
> > > > ---
> > > >  lib/libalpm/alpm.h    | 10 ++++++++++
> > > >  lib/libalpm/package.c | 31 +++++++++++++++++++++++++++++++
> > > >  lib/libalpm/sync.c    | 17 ++++++++---------
> > > >  lib/libalpm/util.c    | 35 +++++++++++++++++++++++++++++++++++
> > > >  lib/libalpm/util.h    |  1 +
> > > >  5 files changed, 85 insertions(+), 9 deletions(-)
> > > >
> > > > diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
> > > > index c6a13273..9c5fff73 100644
> > > > --- a/lib/libalpm/alpm.h
> > > > +++ b/lib/libalpm/alpm.h
> > > > @@ -1403,6 +1403,16 @@ alpm_db_t *alpm_pkg_get_db(alpm_pkg_t *pkg);
> > > >   */
> > > >  const char *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg);
> > > >
> > > > +/** Extracts package signature either from embedded package signature
> > > > + * or if it is absent then reads data from detached signature file.
> > > > + * @param pkg a pointer to package
> > > > + * @param sig output parameter for signature data. Callee function allocates
> > > > + * buffer needed for the signature data. Caller is responsible for
> > > > + * freeing this buffer.
> > > > + * @return size of a signature or negative number if error.
> > > > + */
> > > > +int alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig);
> > > > +
> > > >  /** Returns the method used to validate a package during install.
> > > >   * @param pkg a pointer to package
> > > >   * @return an enum member giving the validation method
> > > > diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
> > > > index 5c5fa073..e0e4d987 100644
> > > > --- a/lib/libalpm/package.c
> > > > +++ b/lib/libalpm/package.c
> > > > @@ -268,6 +268,37 @@ const char SYMEXPORT *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg)
> > > >       return pkg->base64_sig;
> > > >  }
> > > >
> > > > +int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig)
> > >
> > > This API is weird, why are you returning an int when neither of the
> > > successful values are int and can potentially overflow an int?  You're
> > > returning the length of an allocated string, size_t is the appropriate
> > > type.
> >
> > size_t is unsigned int. But this function returns positive length in
> > case of successful signature read or negative value in case of error.
> > Thus return value cannot be size_t.
>
> I get that, but you chose that interface.  Why?  Surely it makes more
> sense to either return 0 on error, since you treat a missing sig as an
> error anyway, or take a size_t* arg like decode_signature.

Okay I see what you are suggesting. Sure I can move "length" value to
a function parameter.

> > >
> > > > +{
> > > > +     ASSERT(pkg != NULL, return -1);
> > > > +     pkg->handle->pm_errno = ALPM_ERR_OK;
> > >
> > > I don't like clearing pm_errno without a good reason.  It generally
> > > doesn't serve any purpose and can get us into trouble if a function
> > > gets called during cleanup somewhere.
> >
> > I personally do not feel that "pm_errno = ALPM_ERR_OK" is useful here
> > and I am more than happy to remove it.
> >
> > But I also I see this pattern is used a lot. Only this file
> > (lib/libalpm/package.c) has like 20 use-cases of it. e.g.
> >
> > off_t SYMEXPORT alpm_pkg_get_isize(alpm_pkg_t *pkg)
> > {
> >         ASSERT(pkg != NULL, return -1);
> >         pkg->handle->pm_errno = ALPM_ERR_OK;
> >         return pkg->ops->get_isize(pkg);
> > }
> >
> > So I was trying to follow a standard practice here. Or are you trying
> > to say that lib/libalpm/package.c examples I was looking at are
> > incorrect?
>
> I consider this an unfortunate anti-pattern.  It has bitten us in the
> past by clearing pm_errno during cleanup just like I said.  I haven't
> been motivated enough to go through and remove it so far, but I don't
> want to add any more instances of it.

It would be great to remove this untipattern completely. It will
reduce confusion for the project contributors.

>
> > > > +     if(pkg->base64_sig) {
> > > > +             size_t sig_len;
> > > > +             int ret = alpm_decode_signature(pkg->base64_sig, sig, &sig_len);
> > > > +             return ret == 0 ? (int)sig_len : -1;
> > > > +     } else {
> > > > +             char *pkgpath, *sigpath;
> > > > +             int len;
> > > > +
> > > > +             pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
> > > > +             if(!pkgpath) {
> > > > +                     RET_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND, -1);
> > > > +             }
> > > > +             sigpath = _alpm_sigpath(pkg->handle, pkgpath);
> > > > +             if(!sigpath || _alpm_access(pkg->handle, NULL, sigpath, R_OK)) {
> > > > +                     FREE(pkgpath);
> > > > +                     FREE(sigpath);
> > > > +                     RET_ERR(pkg->handle, ALPM_ERR_SIG_MISSING, -1);
> > > > +             }
> > > > +             len = _alpm_read_file(sigpath, sig);
> > >
> > > You need to check for and handle failure.
> >
> > Do you mean to handle a file read failure? But both successful and
> > erroneous codepaths are the same - free() the resources and return
> > "len".
>
> What is pm_errno if read_file fails?

Agree. We need to set some ALPM_ERR in this case.

I do not see any IO error related error codes though. For fread()
error I am returning
ALPM_ERR_SYSTEM but maybe we should introduce some ALPM_ERR_IO?

> > > > +             _alpm_log(pkg->handle, ALPM_LOG_DEBUG, "found detached signature %s with size %d\n", sigpath, len);
> > > > +             FREE(pkgpath);
> > > > +             FREE(sigpath);
> > > > +             return len;
> > > > +     }
> > > > +}
> > > > +
> > > >  const char SYMEXPORT *alpm_pkg_get_arch(alpm_pkg_t *pkg)
> > > >  {
> > > >       ASSERT(pkg != NULL, return NULL);
> > > > diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
> > > > index 8c01ad95..0ab0fe26 100644
> > > > --- a/lib/libalpm/sync.c
> > > > +++ b/lib/libalpm/sync.c
> > > > @@ -880,18 +880,17 @@ static int check_keyring(alpm_handle_t *handle)
> > > >               }
> > > >
> > > >               level = alpm_db_get_siglevel(alpm_pkg_get_db(pkg));
> > > > -             if((level & ALPM_SIG_PACKAGE) && pkg->base64_sig) {
> > > > -                     unsigned char *decoded_sigdata = NULL;
> > > > -                     size_t data_len;
> > > > -                     int decode_ret = alpm_decode_signature(pkg->base64_sig,
> > > > -                                     &decoded_sigdata, &data_len);
> > > > -                     if(decode_ret == 0) {
> > > > +             if((level & ALPM_SIG_PACKAGE)) {
> > > > +                     unsigned char *signature = NULL;
> > > > +                     int sig_len = alpm_pkg_get_sig(pkg, &signature);
> > > > +                     if(sig_len > 0) {
> > > >                               alpm_list_t *keys = NULL;
> > > > -                             if(alpm_extract_keyid(handle, pkg->name, decoded_sigdata,
> > > > -                                                     data_len, &keys) == 0) {
> > > > +                             if(alpm_extract_keyid(handle, pkg->name, signature,
> > > > +                                                     sig_len, &keys) == 0) {
> > > >                                       alpm_list_t *k;
> > > >                                       for(k = keys; k; k = k->next) {
> > > >                                               char *key = k->data;
> > > > +                                             _alpm_log(handle, ALPM_LOG_DEBUG, "found signature key: %s\n", key);
> > > >                                               if(!alpm_list_find(errors, key, key_cmp) &&
> > > >                                                               _alpm_key_in_keychain(handle, key) == 0) {
> > > >                                                       keyinfo = malloc(sizeof(struct keyinfo_t));
> > > > @@ -905,8 +904,8 @@ static int check_keyring(alpm_handle_t *handle)
> > > >                                       }
> > > >                                       FREELIST(keys);
> > > >                               }
> > > > -                             free(decoded_sigdata);
> > > >                       }
> > > > +                     free(signature);
> > > >               }
> > > >       }
> > > >
> > > > diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
> > > > index 76728eb4..3d57817b 100644
> > > > --- a/lib/libalpm/util.c
> > > > +++ b/lib/libalpm/util.c
> > > > @@ -1489,3 +1489,38 @@ void _alpm_alloc_fail(size_t size)
> > > >  {
> > > >       fprintf(stderr, "alloc failure: could not allocate %zu bytes\n", size);
> > > >  }
> > > > +
> > > > +/** This functions reads file content.
> > > > + *
> > > > + * Memory buffer is allocated by the callee function. It is responsibility
> > > > + * of the caller to free the buffer
> > > > + *
> > > > + * @param filepath filepath to read
> > > > + * @param data pointer to output buffer
> > > > + * @return size of the data read or negative number in case of error
> > > > + */
> > > > +int _alpm_read_file(const char *filepath, unsigned char **data)
> > >
> > > Same as above.
> > >
> > > > +{
> > > > +     struct stat st;
> > > > +     FILE *fp;
> > > > +
> > > > +     if((fp = fopen(filepath, "rb")) == NULL) {
> > > > +             return -1;
> > > > +     }
> > > > +
> > > > +     if(fstat(fileno(fp), &st) != 0) {
> > > > +             fclose(fp);
> > > > +             return -1;
> > > > +     }
> > > > +
> > > > +     MALLOC(*data, st.st_size, fclose(fp); return -1);
> > > > +
> > > > +     if(fread(*data, st.st_size, 1, fp) != 1) {
> > > > +             FREE(*data);
> > > > +             fclose(fp);
> > > > +             return -1;
> > > > +     }
> > > > +
> > > > +     fclose(fp);
> > > > +     return st.st_size;
> > > > +}
> > > > diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
> > > > index 4fc6e718..50a94489 100644
> > > > --- a/lib/libalpm/util.h
> > > > +++ b/lib/libalpm/util.h
> > > > @@ -155,6 +155,7 @@ int _alpm_fnmatch_patterns(alpm_list_t *patterns, const char *string);
> > > >  int _alpm_fnmatch(const void *pattern, const void *string);
> > > >  void *_alpm_realloc(void **data, size_t *current, const size_t required);
> > > >  void *_alpm_greedy_grow(void **data, size_t *current, const size_t required);
> > > > +int _alpm_read_file(const char *filepath, unsigned char **data);
> > > >
> > > >  #ifndef HAVE_STRSEP
> > > >  char *strsep(char **, const char *);
> > > > --
> > > > 2.26.2
>
> --
> apg

Patch

diff --git a/lib/libalpm/alpm.h b/lib/libalpm/alpm.h
index c6a13273..9c5fff73 100644
--- a/lib/libalpm/alpm.h
+++ b/lib/libalpm/alpm.h
@@ -1403,6 +1403,16 @@  alpm_db_t *alpm_pkg_get_db(alpm_pkg_t *pkg);
  */
 const char *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg);
 
+/** Extracts package signature either from embedded package signature
+ * or if it is absent then reads data from detached signature file.
+ * @param pkg a pointer to package
+ * @param sig output parameter for signature data. Callee function allocates
+ * buffer needed for the signature data. Caller is responsible for
+ * freeing this buffer.
+ * @return size of a signature or negative number if error.
+ */
+int alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig);
+
 /** Returns the method used to validate a package during install.
  * @param pkg a pointer to package
  * @return an enum member giving the validation method
diff --git a/lib/libalpm/package.c b/lib/libalpm/package.c
index 5c5fa073..e0e4d987 100644
--- a/lib/libalpm/package.c
+++ b/lib/libalpm/package.c
@@ -268,6 +268,37 @@  const char SYMEXPORT *alpm_pkg_get_base64_sig(alpm_pkg_t *pkg)
 	return pkg->base64_sig;
 }
 
+int SYMEXPORT alpm_pkg_get_sig(alpm_pkg_t *pkg, unsigned char **sig)
+{
+	ASSERT(pkg != NULL, return -1);
+	pkg->handle->pm_errno = ALPM_ERR_OK;
+
+	if(pkg->base64_sig) {
+		size_t sig_len;
+		int ret = alpm_decode_signature(pkg->base64_sig, sig, &sig_len);
+		return ret == 0 ? (int)sig_len : -1;
+	} else {
+		char *pkgpath, *sigpath;
+		int len;
+
+		pkgpath = _alpm_filecache_find(pkg->handle, pkg->filename);
+		if(!pkgpath) {
+			RET_ERR(pkg->handle, ALPM_ERR_PKG_NOT_FOUND, -1);
+		}
+		sigpath = _alpm_sigpath(pkg->handle, pkgpath);
+		if(!sigpath || _alpm_access(pkg->handle, NULL, sigpath, R_OK)) {
+			FREE(pkgpath);
+			FREE(sigpath);
+			RET_ERR(pkg->handle, ALPM_ERR_SIG_MISSING, -1);
+		}
+		len = _alpm_read_file(sigpath, sig);
+		_alpm_log(pkg->handle, ALPM_LOG_DEBUG, "found detached signature %s with size %d\n", sigpath, len);
+		FREE(pkgpath);
+		FREE(sigpath);
+		return len;
+	}
+}
+
 const char SYMEXPORT *alpm_pkg_get_arch(alpm_pkg_t *pkg)
 {
 	ASSERT(pkg != NULL, return NULL);
diff --git a/lib/libalpm/sync.c b/lib/libalpm/sync.c
index 8c01ad95..0ab0fe26 100644
--- a/lib/libalpm/sync.c
+++ b/lib/libalpm/sync.c
@@ -880,18 +880,17 @@  static int check_keyring(alpm_handle_t *handle)
 		}
 
 		level = alpm_db_get_siglevel(alpm_pkg_get_db(pkg));
-		if((level & ALPM_SIG_PACKAGE) && pkg->base64_sig) {
-			unsigned char *decoded_sigdata = NULL;
-			size_t data_len;
-			int decode_ret = alpm_decode_signature(pkg->base64_sig,
-					&decoded_sigdata, &data_len);
-			if(decode_ret == 0) {
+		if((level & ALPM_SIG_PACKAGE)) {
+			unsigned char *signature = NULL;
+			int sig_len = alpm_pkg_get_sig(pkg, &signature);
+			if(sig_len > 0) {
 				alpm_list_t *keys = NULL;
-				if(alpm_extract_keyid(handle, pkg->name, decoded_sigdata,
-							data_len, &keys) == 0) {
+				if(alpm_extract_keyid(handle, pkg->name, signature,
+							sig_len, &keys) == 0) {
 					alpm_list_t *k;
 					for(k = keys; k; k = k->next) {
 						char *key = k->data;
+						_alpm_log(handle, ALPM_LOG_DEBUG, "found signature key: %s\n", key);
 						if(!alpm_list_find(errors, key, key_cmp) &&
 								_alpm_key_in_keychain(handle, key) == 0) {
 							keyinfo = malloc(sizeof(struct keyinfo_t));
@@ -905,8 +904,8 @@  static int check_keyring(alpm_handle_t *handle)
 					}
 					FREELIST(keys);
 				}
-				free(decoded_sigdata);
 			}
+			free(signature);
 		}
 	}
 
diff --git a/lib/libalpm/util.c b/lib/libalpm/util.c
index 76728eb4..3d57817b 100644
--- a/lib/libalpm/util.c
+++ b/lib/libalpm/util.c
@@ -1489,3 +1489,38 @@  void _alpm_alloc_fail(size_t size)
 {
 	fprintf(stderr, "alloc failure: could not allocate %zu bytes\n", size);
 }
+
+/** This functions reads file content.
+ *
+ * Memory buffer is allocated by the callee function. It is responsibility
+ * of the caller to free the buffer
+ *
+ * @param filepath filepath to read
+ * @param data pointer to output buffer
+ * @return size of the data read or negative number in case of error
+ */
+int _alpm_read_file(const char *filepath, unsigned char **data)
+{
+	struct stat st;
+	FILE *fp;
+
+	if((fp = fopen(filepath, "rb")) == NULL) {
+		return -1;
+	}
+
+	if(fstat(fileno(fp), &st) != 0) {
+		fclose(fp);
+		return -1;
+	}
+
+	MALLOC(*data, st.st_size, fclose(fp); return -1);
+
+	if(fread(*data, st.st_size, 1, fp) != 1) {
+		FREE(*data);
+		fclose(fp);
+		return -1;
+	}
+
+	fclose(fp);
+	return st.st_size;
+}
diff --git a/lib/libalpm/util.h b/lib/libalpm/util.h
index 4fc6e718..50a94489 100644
--- a/lib/libalpm/util.h
+++ b/lib/libalpm/util.h
@@ -155,6 +155,7 @@  int _alpm_fnmatch_patterns(alpm_list_t *patterns, const char *string);
 int _alpm_fnmatch(const void *pattern, const void *string);
 void *_alpm_realloc(void **data, size_t *current, const size_t required);
 void *_alpm_greedy_grow(void **data, size_t *current, const size_t required);
+int _alpm_read_file(const char *filepath, unsigned char **data);
 
 #ifndef HAVE_STRSEP
 char *strsep(char **, const char *);