[v2] paccache: add --age-atime and --age-mtime options
diff mbox

Message ID 20180923203606.17704-1-wisp3rwind@posteo.eu
State Accepted
Headers show

Commit Message

wisp3rwind Sept. 23, 2018, 8:36 p.m. UTC
---
Hi,

> So I would overall be okay with adding something like this, but there 
> are some changes I would want to have made first.  First of all, is 
> there any specific case where you would need both supported?  Because 
> only having mtime support sounds like it should be good enough?

In fact, my usecase would only involve atime. mtime is probably not a
very useful value, since it will usually be the date the package was
built. The timestamp I'm actually interested in is installation time.
Even for a `noatime`-mounted drive (where atime will be the download
time), atime will be close to when the package was first installed.

I agree that `--min-{a,m}time` is much more descriptive, changed that.

@Eli: Thanks for the pointers at `touch`/`find`. I dropped my own parser in
favor of invoking `date` (which uses the same parser as `touch`).
However, because I want to combine the retention according to `--keep`
and `--min-{a,m}time`, candidate selection cannot be done directly by
`find`, but still needs to be done manually (in the awk script).

Also, not creating a new `stat` process for every single file appears to
improve performance of this feature quite a lot (subjectively, didn't
benchmark).

 doc/paccache.8.txt |  5 ++++
 src/paccache.sh.in | 61 ++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 59 insertions(+), 7 deletions(-)

Patch
diff mbox

diff --git a/doc/paccache.8.txt b/doc/paccache.8.txt
index db81283..196bb49 100644
--- a/doc/paccache.8.txt
+++ b/doc/paccache.8.txt
@@ -38,6 +38,11 @@  Options
 	Scan for packages for a specific architecture. Default is to scan for
 	all architectures.
 
+*\--min-atime <age>*::
+*\--min-mtime <age>*::
+	Only consider packages for removal with atime respectively mtime older than
+	specified. The age can be given as '10d', '1m', '1y', '1y1m' etc.
+
 *-c, \--cachedir <dir>*::
 	Specify a different cache directory. This option can be used more than once.
 	Default is to use the cache directory configured in 'pacman.conf'.
diff --git a/src/paccache.sh.in b/src/paccache.sh.in
index 012ba9f..70e30e0 100644
--- a/src/paccache.sh.in
+++ b/src/paccache.sh.in
@@ -27,6 +27,7 @@  declare -r myver='@PACKAGE_VERSION@'
 
 declare -a cachedirs=() candidates=() cmdopts=() whitelist=() blacklist=()
 declare -i delete=0 dryrun=0 filecount=0 move=0 needsroot=0 totalsaved=0 verbose=0
+declare -i min_atime=0 min_mtime=0
 declare    delim=$'\n' keep=3 movedir= scanarch=
 
 QUIET=0
@@ -45,13 +46,23 @@  pkgfilter() {
 	# there's whitelist and blacklist parameters passed to this
 	# script after the block of awk.
 
-	awk -v keep="$1" -v scanarch="$2" '
+	awk -v keep="$1" -v scanarch="$2" -v min_atime="$3" -v min_mtime="$4" '
 	function basename(str) {
 		sub(".*/", "", str);
 		return str;
 	}
 
-	function parse_filename(filename,     parts, count, i, pkgname, arch) {
+	function parse_filename(filename,
+	                        atime, mtime, parts, count, i, pkgname, arch) {
+
+		if (0 + min_atime + min_mtime != 0) {
+			# atime and mtime are in the first two columns and the
+			# separator is a single space
+			split(filename, parts, " ")
+			atime = parts[1]
+			mtime = parts[2]
+			filename = substr(filename, length(atime) + length(mtime) + 3)
+		}
 
 		count = split(basename(filename), parts, "-")
 
@@ -69,8 +80,12 @@  pkgfilter() {
 
 		if ("" == packages[pkgname,arch]) {
 			packages[pkgname,arch] = filename
+			atimes[pkgname,arch] = atime
+			mtimes[pkgname,arch] = mtime
 		} else {
 			packages[pkgname,arch] = packages[pkgname,arch] SUBSEP filename
+			atimes[pkgname,arch] = atimes[pkgname,arch] SUBSEP atime
+			mtimes[pkgname,arch] = mtimes[pkgname,arch] SUBSEP mtime
 		}
 	}
 
@@ -101,12 +116,19 @@  pkgfilter() {
 			# enforce architecture match if specified
 			if (!scanarch || scanarch == idx[2]) {
 				count = split(packages[idx[1], idx[2]], pkgs, SUBSEP)
+				split(atimes[idx[1], idx[2]], atime, SUBSEP)
+				split(mtimes[idx[1], idx[2]], mtime, SUBSEP)
 				for(i = 1; i <= count - keep; i++) {
-					print pkgs[i]
+					# If checking file age, potentially keep more candidates
+					if ((0 + min_atime == 0 || (strtonum(atime[i]) < 0 + min_atime)) &&
+					    (0 + min_mtime == 0 || (strtonum(mtime[i]) < 0 + min_mtime)) \
+					    ) {
+						print pkgs[i]
+					}
 				}
 			}
 		}
-	}' "${@:3}"
+	}' "${@:5}"
 }
 
 m4_include(../lib/size_to_human.sh)
@@ -174,6 +196,12 @@  Usage: ${myname} <operation> [options] [targets...]
     -r, --remove          remove candidate packages.
 
   Options:
+    --min-atime <time>
+    --min-mtime <time>    keep packages with an atime/mtime that is not older
+						  than the time given, even if this means keeping more
+						  than specified through the '--keep' option. Accepts
+						  arguments according to 'info "Date input formats"',
+						  e.g. '30 days ago'.
     -a, --arch <arch>     scan for "arch" (default: all architectures).
     -c, --cachedir <dir>  scan "dir" for packages. can be used more than once.
                           (default: read from @sysconfdir@/pacman.conf).
@@ -200,7 +228,8 @@  version() {
 
 OPT_SHORT=':a:c:dfhi:k:m:qrsuVvz'
 OPT_LONG=('arch:' 'cachedir:' 'dryrun' 'force' 'help' 'ignore:' 'keep:' 'move'
-          'nocolor' 'quiet' 'remove' 'uninstalled' 'version' 'verbose' 'null')
+          'nocolor' 'quiet' 'remove' 'uninstalled' 'version' 'verbose' 'null'
+          'min-atime:' 'min-mtime:')
 
 if ! parseopts "$OPT_SHORT" "${OPT_LONG[@]}" -- "$@"; then
 	exit 1
@@ -210,6 +239,18 @@  unset OPT_SHORT OPT_LONG OPTRET
 
 while :; do
 	case $1 in
+		--min-atime)
+			min_atime=$(date -d "$2" +%s)
+			if (( $? )); then
+				die "argument to option --min-atime must be of the form described by 'info \"Date input formats\" '."
+			fi
+			shift ;;
+		--min-mtime)
+			min_mtime=$(date -d "$2" +%s)
+			if (( $? )); then
+				die "argument to option --min-mtime must be of the form described by 'info \"Date input formats\" '."
+			fi
+			shift ;;
 		-a|--arch)
 			scanarch=$2
 			shift ;;
@@ -308,8 +349,14 @@  for cachedir in "${cachedirs[@]}"; do
 	# note that these results are returned in an arbitrary order from awk, but
 	# they'll be resorted (in summarize) iff we have a verbosity level set.
 	IFS=$'\n' read -r -d '' -a cand < \
-		<(printf '%s\n' "$PWD"/*.pkg.tar!(*.sig) | pacsort --files |
-			pkgfilter "$keep" "$scanarch" \
+		<(	if (( min_atime || min_mtime )); then
+				find "$PWD" -name '*.pkg.tar*.sig' -prune -o \( -name '*.pkg.tar*' -printf '%A@ %T@ %p\n' \) |
+				pacsort --files --key 3 --separator ' '
+			else
+				printf '%s\n' "$PWD"/*.pkg.tar!(*.sig) |
+				pacsort --files
+			fi |
+			pkgfilter "$keep" "$scanarch" "$min_atime" "$min_mtime" \
 				"${#whitelist[*]}" "${whitelist[@]}" \
 				"${#blacklist[*]}" "${blacklist[@]}")