[dbscripts,v2,2/6] db-move, db-remove: Don't parse PKGBUILD files

Message ID 20180622160816.16076-3-lukeshu@lukeshu.com
State New
Headers show
Series
  • Be less tightly coupled with SVN
Related show

Commit Message

Luke Shumaker June 22, 2018, 4:08 p.m. UTC
From: Luke Shumaker <lukeshu@parabola.nu>

Don't try to parse PKGBUILD files from SVN; all of the information we need
is already in the DBEXT files.  Several programs use [[ -f PKGBUILD ]] or
[[ -r PKGBUILD ]] on files from SVN; those checks can stay, just remove all
instances of actually trying to *parse* those files.

As an exception, don't modify parse_pkgbuilds.sh (which is called by
check_packages.py, which is called by cron-jobs/integrity-check).

Most of the attributes we need have been present in the DBEXT files for as
long as `repo-add` has been part of pacman:

    attribute |   git    |  git date  |  ver  |  ver date
    ----------+----------+------------+-------+-----------
    FILENAME  | aa1c0ba9 | 2006-11-20 | 3.0.0 | 2007-03-25
    NAME      | aa1c0ba9 | 2006-11-20 | 3.0.0 | 2007-03-25
    VERSION   | aa1c0ba9 | 2006-11-20 | 3.0.0 | 2007-03-25
    ARCH      | aa1c0ba9 | 2006-11-20 | 3.0.0 | 2007-03-25
    BASE      | 4b21504f | 2009-07-22 | 3.3.0 | 2009-08-02

However, `BASE` for split packages is slightly newer, and its presence
relies not only on the `repo-add` version, but also the `makepkg` version
used to build the package.

As of 2018-06-15, the oldest package in any of the Arch Linux repos is
community/gimp-refocus, built on 2013-07-22; I don't believe it is
necessary to handle packages from before that change was made (before
`BASE` was set).

"Ignore space change" might be useful when viewing this diff.
---
 db-functions | 38 ++++++++++++++++++--
 db-move      | 99 ++++++++++++++++++----------------------------------
 db-remove    | 11 +++---
 3 files changed, 74 insertions(+), 74 deletions(-)

Patch

diff --git a/db-functions b/db-functions
index 0491c22..69f35b4 100644
--- a/db-functions
+++ b/db-functions
@@ -6,9 +6,6 @@ 
 shopt -s extglob globstar nullglob
 
 
-# Some PKGBUILDs need CARCH to be set
-CARCH="x86_64"
-
 # Useful functions
 UMASK=""
 set_umask () {
@@ -294,6 +291,41 @@  getpkgfiles() {
 	echo "${files[@]}"
 }
 
+# usage: getdbinfo repo arch <pkgbase|pkgname> fields
+getdbinfo() {
+	local repo=$1
+	local arch=$2
+	local pkgbase=$3
+	local fields=$4
+
+	local dbfile="${FTP_BASE}/${repo}/os/${arch}/${repo}${DBEXT}"
+	local dbdir="${WORKDIR}/dbfiles/${repo}-${arch}"
+	if ! [[ -d $dbdir ]]; then
+		mkdir -p "$dbdir"
+		bsdtar -xf "$dbfile" -C "$dbdir" --include='*/desc'
+	fi
+
+	# The grep/xargs is just a fast filter with possible
+	# false-postives (like a Bloom filter), because awk is
+	# comparatively slow.  You could remove the grep/xargs line,
+	# and append `"$dbdir"/*/desc` to the awk command, and it
+	# would do the same thing; just several times slower.
+	grep -r -lZ -Fx -e "$pkgbase" -- "$dbdir" | xargs -r0 \
+	awk -v pkgbase="$pkgbase" -v fields="$fields"  -vFS='\n' -vRS='' '
+		{ dat[gensub(/^%(.*)%$/, "\\1", 1, $1)] = gensub(FS, ",", "g", gensub($1 FS, "", 1)) }
+		ENDFILE {
+			if (dat["BASE"] == pkgbase || dat["NAME"] == pkgbase) {
+				n=split(fields, fieldlist, ",")
+				str = ""
+				for (i=1; i<=n; i++)
+					str = str (i==1?"":" ") dat[fieldlist[i]]
+				print str
+			}
+			delete dat
+		}
+	'
+}
+
 check_pkgfile() {
 	local pkgfile=$1
 
diff --git a/db-move b/db-move
index 63e5c14..03debfc 100755
--- a/db-move
+++ b/db-move
@@ -28,32 +28,14 @@  done
 arch_svn checkout -q -N "${SVNREPO}" "${WORKDIR}/svn" >/dev/null
 for pkgbase in "${args[@]:2}"; do
 	arch_svn up -q "${WORKDIR}/svn/${pkgbase}" >/dev/null
-	for pkgarch in "${ARCHES[@]}" 'any'; do
-		svnrepo_from="${WORKDIR}/svn/${pkgbase}/repos/${repo_from}-${pkgarch}"
-		if [[ -r ${svnrepo_from}/PKGBUILD ]]; then
-			pkgnames=($(. "${svnrepo_from}/PKGBUILD"; echo "${pkgname[@]}"))
-			if (( ${#pkgnames[@]} < 1 )); then
-				die "Could not read pkgname"
+	for tarch in "${ARCHES[@]}"; do
+		while read -r pkgarch pkgfile; do
+			svnrepo_from="${WORKDIR}/svn/${pkgbase}/repos/${repo_from}-${pkgarch}"
+			if [[ -r ${svnrepo_from}/PKGBUILD ]]; then
+				getpkgfile "${ftppath_from}/${tarch}/${pkgfile}" >/dev/null
+				continue 3
 			fi
-
-			pkgver=$(. "${svnrepo_from}/PKGBUILD"; get_full_version)
-			if [[ -z ${pkgver} ]]; then
-				die "Could not read pkgver"
-			fi
-
-			if [[ "${pkgarch}" = any ]]; then
-				tarches=("${ARCHES[@]}")
-			else
-				tarches=("${pkgarch}")
-			fi
-
-			for pkgname in "${pkgnames[@]}"; do
-				for tarch in "${tarches[@]}"; do
-					getpkgfile "${ftppath_from}/${tarch}/${pkgname}-${pkgver}-${pkgarch}"${PKGEXTS} >/dev/null
-				done
-			done
-			continue 2
-		fi
+		done < <(getdbinfo "$repo_from" "$tarch" "$pkgbase" ARCH,FILENAME)
 	done
 	die "%s not found in %s" "$pkgbase" "$repo_from"
 done
@@ -64,49 +46,36 @@  declare -A add_pkgs
 declare -A remove_pkgs
 for pkgbase in "${args[@]:2}"; do
 	tag_list=""
-	for pkgarch in "${ARCHES[@]}" 'any'; do
-		svnrepo_from="${WORKDIR}/svn/${pkgbase}/repos/${repo_from}-${pkgarch}"
-		svnrepo_to="${WORKDIR}/svn/${pkgbase}/repos/${repo_to}-${pkgarch}"
-
-		if [[ -f ${svnrepo_from}/PKGBUILD ]]; then
-			if [[ ${pkgarch} = any ]]; then
-				tarches=("${ARCHES[@]}")
-			else
-				tarches=("${pkgarch}")
-			fi
-			msg2 "%s (%s)" "$pkgbase" "${tarches[*]}"
-			pkgnames=($(. "${svnrepo_from}/PKGBUILD"; echo "${pkgname[@]}"))
-			pkgver=$(. "${svnrepo_from}/PKGBUILD"; get_full_version)
-
-			if [[ -d ${svnrepo_to} ]]; then
-				for file in $(arch_svn ls "${svnrepo_to}"); do
-					arch_svn rm -q "${svnrepo_to}/$file@"
+	for tarch in "${ARCHES[@]}"; do
+		while read -r pkgname pkgver pkgarch pkgfile; do
+			svnrepo_from="${WORKDIR}/svn/${pkgbase}/repos/${repo_from}-${pkgarch}"
+			svnrepo_to="${WORKDIR}/svn/${pkgbase}/repos/${repo_to}-${pkgarch}"
+			if [[ -f ${svnrepo_from}/PKGBUILD ]]; then
+				msg2 "%s (%s)" "$pkgbase" "$pkgarch"
+
+				if [[ -d ${svnrepo_to} ]]; then
+					for file in $(arch_svn ls "${svnrepo_to}"); do
+						arch_svn rm -q "${svnrepo_to}/$file@"
+					done
+				else
+					mkdir "${svnrepo_to}"
+					arch_svn add -q "${svnrepo_to}"
+				fi
+
+				for file in $(arch_svn ls "${svnrepo_from}"); do
+					arch_svn mv -q -r HEAD "${svnrepo_from}/$file@" "${svnrepo_to}/"
 				done
-			else
-				mkdir "${svnrepo_to}"
-				arch_svn add -q "${svnrepo_to}"
+				arch_svn rm --force -q "${svnrepo_from}"
+				tag_list+=", $pkgarch"
 			fi
 
-			for file in $(arch_svn ls "${svnrepo_from}"); do
-				arch_svn mv -q -r HEAD "${svnrepo_from}/$file@" "${svnrepo_to}/"
-			done
-			arch_svn rm --force -q "${svnrepo_from}"
-			tag_list+=", $pkgarch"
-
-			for pkgname in "${pkgnames[@]}"; do
-				for tarch in "${tarches[@]}"; do
-					pkgpath=$(getpkgfile "${ftppath_from}/${tarch}/${pkgname}-${pkgver}-${pkgarch}"${PKGEXTS})
-					pkgfile="${pkgpath##*/}"
-
-					ln -s "../../../${PKGPOOL}/${pkgfile}" "${ftppath_to}/${tarch}/"
-					if [[ -f ${FTP_BASE}/${PKGPOOL}/${pkgfile}.sig ]]; then
-						ln -s "../../../${PKGPOOL}/${pkgfile}.sig" "${ftppath_to}/${tarch}/"
-					fi
-					add_pkgs[${tarch}]+="${FTP_BASE}/${PKGPOOL}/${pkgfile} "
-					remove_pkgs[${tarch}]+="${pkgname} "
-				done
-			done
-		fi
+			ln -s "../../../${PKGPOOL}/${pkgfile}" "${ftppath_to}/${tarch}/"
+			if [[ -f ${FTP_BASE}/${PKGPOOL}/${pkgfile}.sig ]]; then
+				ln -s "../../../${PKGPOOL}/${pkgfile}.sig" "${ftppath_to}/${tarch}/"
+			fi
+			add_pkgs[${tarch}]+="${FTP_BASE}/${PKGPOOL}/${pkgfile} "
+			remove_pkgs[${tarch}]+="${pkgname} "
+		done < <(getdbinfo "$repo_from" "$tarch" "$pkgbase" NAME,VERSION,ARCH,FILENAME)
 	done
 	tag_list="${tag_list#, }"
 	arch_svn commit -q "${WORKDIR}/svn/${pkgbase}" -m "${0##*/}: moved ${pkgbase} from [${repo_from}] to [${repo_to}] (${tag_list})"
diff --git a/db-remove b/db-remove
index ac9a168..3017026 100755
--- a/db-remove
+++ b/db-remove
@@ -32,17 +32,16 @@  done
 remove_pkgs=()
 for pkgbase in "${pkgbases[@]}"; do
 	msg "Removing %s from [%s]..." "$pkgbase" "$repo"
-	arch_svn checkout -q "${SVNREPO}/${pkgbase}" "${WORKDIR}/svn/${pkgbase}" >/dev/null
 
+	mapfile -t pkgnames < <(getdbinfo "$repo" "${tarches[0]}" "$pkgbase" NAME)
+	remove_pkgs+=("${pkgnames[@]}")
+
+	arch_svn checkout -q "${SVNREPO}/${pkgbase}" "${WORKDIR}/svn/${pkgbase}" >/dev/null
 	if [[ -d ${WORKDIR}/svn/$pkgbase/repos/$svnrepo ]]; then
-		remove_pkgs+=($(. "${WORKDIR}/svn/$pkgbase/repos/$svnrepo/PKGBUILD"; echo ${pkgname[@]}))
 		arch_svn rm --force -q "${WORKDIR}/svn/$pkgbase/repos/$svnrepo"
 		arch_svn commit -q "${WORKDIR}/svn/$pkgbase" -m "${0##*/}: $pkgbase removed by $(id -un)"
 	else
-		warning "%s not found in %s" "$pkgbase" "$svnrepo"
-		warning "Removing only %s from the repo" "$pkgbase"
-		warning "If it was a split package you have to remove the others yourself!"
-		remove_pkgs+=("$pkgbase")
+		warning "pkgbase '%s' not found in svn; unable to commit removal to svn" "$pkgbase"
 	fi
 done