[pacman-dev] makepkg: do not count hard links multiple times when calculating pkg size

Message ID 20191027074146.1833127-1-eschwartz@archlinux.org
State Accepted, archived
Headers show
Series [pacman-dev] makepkg: do not count hard links multiple times when calculating pkg size | expand

Commit Message

Eli Schwartz Oct. 27, 2019, 7:41 a.m. UTC
Exclude files with hardlinks when cat'ing all the files, and do a second
run to look at each file with hardlinks, keep track of the ones we've
already operated on, and only cat each inode once. Then use "wc -c" to get
the size of all (deduplicated) files the same way we were already doing.

Original-patch-by: Ronan Pigott <rpigott@berkeley.edu>
Signed-off-by: Eli Schwartz <eschwartz@archlinux.org>
---

 scripts/Makefile.am                   |  1 +
 scripts/libmakepkg/util/dirsize.sh.in | 41 +++++++++++++++++++++++++++
 scripts/libmakepkg/util/meson.build   |  1 +
 scripts/makepkg.sh.in                 |  2 +-
 4 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 scripts/libmakepkg/util/dirsize.sh.in

Patch

diff --git a/scripts/Makefile.am b/scripts/Makefile.am
index 88e9612d..63d09767 100644
--- a/scripts/Makefile.am
+++ b/scripts/Makefile.am
@@ -127,6 +127,7 @@  LIBMAKEPKG_IN = \
 	libmakepkg/util.sh \
 	libmakepkg/util/compress.sh \
 	libmakepkg/util/config.sh \
+	libmakepkg/util/dirsize.sh \
 	libmakepkg/util/error.sh \
 	libmakepkg/util/message.sh \
 	libmakepkg/util/option.sh \
diff --git a/scripts/libmakepkg/util/dirsize.sh.in b/scripts/libmakepkg/util/dirsize.sh.in
new file mode 100644
index 00000000..fce9f5b5
--- /dev/null
+++ b/scripts/libmakepkg/util/dirsize.sh.in
@@ -0,0 +1,41 @@ 
+#!/usr/bin/bash
+#
+#   dirsize.sh - calculate size of all files in a directory
+#
+#   Copyright (c) 2019 Pacman Development Team <pacman-dev@archlinux.org>
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+[[ -n "$LIBMAKEPKG_UTIL_DIRSIZE_SH" ]] && return
+LIBMAKEPKG_UTIL_DIRSIZE_SH=1
+
+
+# find the total filesize of all files in the current directory while only
+# counting multiply hardlinked files once
+dirsize() {
+	local file inode
+	declare -A files
+
+	{
+		find . -type f -links 1 -exec cat {} + 2>/dev/null
+		while read -rd ' ' inode; do
+			IFS= read -r file
+			if [[ -z ${files[$inode]} ]]; then
+				files[$inode]=found
+				cat "$file"
+			fi
+		done < <(find . -type f -links +1 -exec @INODECMD@ {} + 2>/dev/null)
+	} | wc -c
+}
diff --git a/scripts/libmakepkg/util/meson.build b/scripts/libmakepkg/util/meson.build
index c29503b7..6160d87e 100644
--- a/scripts/libmakepkg/util/meson.build
+++ b/scripts/libmakepkg/util/meson.build
@@ -3,6 +3,7 @@  libmakepkg_module = 'util'
 sources = [
   'compress.sh.in',
   'config.sh.in',
+  'dirsize.sh.in'
   'error.sh.in',
   'message.sh.in',
   'option.sh.in',
diff --git a/scripts/makepkg.sh.in b/scripts/makepkg.sh.in
index 947a30c5..d217d0a4 100644
--- a/scripts/makepkg.sh.in
+++ b/scripts/makepkg.sh.in
@@ -584,7 +584,7 @@  write_kv_pair() {
 }
 
 write_pkginfo() {
-	local size="$(find . -type f -exec cat {} + 2>/dev/null | wc -c)"
+	local size=$(dirsize)
 
 	merge_arch_attrs