#!/bin/dash
# fatdog-merge-layers.sh - merge one tmpfs layer to another, deleting the original
# Copyright (C) James Budiono 2011, 2012, 2013, 2016, 2018, 2020
# License: GNU GPL Version 3 or later
#
# 2018-11-01: Re-factor, remove dead code, add exclusion for both removal and merge
# 2020-03-12: Add fix_bottom_layer, clean_non_sfs_whiteouts, remove bashism
#
# Note: supports aufs only
# Note: (obviously) assumes that both layers are r/w
#
# Call: $1 merge-from-layer, $2-merge-to-layer
#

### Initial setup
# Note: DON'T use trailing slash here
TMPFS="$1"			# was /initrd/pup_rw
PUPSAVE="$2"		# was /initrd/pup_ro1

# only merge when both layers exist
! [ "$TMPFS" -a "$PUPSAVE" ] && exit 

### Configuration 
LANG=C # speed up bash
DISKFULL=/tmp/diskfull
MERGED_FILES=/tmp/snapmergepuppy.files
EVENTMANAGER_CONFIG=/etc/eventmanager
[ -e $EVENTMANAGER_CONFIG ] && . $EVENTMANAGER_CONFIG # # CLEAR_NON_SFS_WHITEOUT

# no-remove regex - for closed files
NOREMOVE_LIST=".wh..wh.orph|.wh..wh.plnk|.wh..wh.aufs|\
$TMPFS/var/run|\
$TMPFS/etc/passwd-?$|\
$TMPFS/etc/shadow-?$|\
$TMPFS/etc/group-?$|\
$TMPFS/etc/gshadow-?$|\
/([^.][^w][^h][^.][^/]*|...|..|.)[.]sock$|\
/socket$|\
/lxqt.conf$"

# No merge regex - for rsync
NOMERGE_LIST="|\
" #/var/db/fontconfig/"
NOMERGE_LIST="$(echo "$NOMERGE_LIST" | sed 's_|_ --exclude=_g')"
#NOMERGE_LIST="${NOMERGE_LIST//|/ --exclude=}" # bashism is faster


### helpers

# print list of all closed files in TMPFS
list_closed_files() {
	# awk is lot faster than bash
	awk -v tmpfs=$TMPFS -v ignore_list="$NOREMOVE_LIST" -v merged_files="$MERGED_FILES" '
BEGIN {
	# get the list of open files before processing - this replaces lsof
	OPENFILES = "find /proc -type l -wholename \"/proc/*/fd/*\" 2>/dev/null " \
	            "| xargs readlink 2>/dev/null "                               \
	            "| grep -vE \"^socket:|^pipe:|^anon_inode:\" "                \
	            "| sort -u "
	while (OPENFILES | getline) {
		openfiles[tmpfs $0]=1
	}
	close(OPENFILES)

	# now compare this with the list of files in $TMPFS
	ALLFILES = "find \"" tmpfs "\" -not -type d | grep -Fxf \"" merged_files "\" "
	while (ALLFILES | getline) {
		if ($0 ~ ignore_list) continue;
		if (openfiles[$0] != 1) 
			printf("%s\0",$0);
	}
	close(ALLFILES)
}'
}

# check that diropq is working (it sometimes doesn't).
# test method: create a test in pupsave and see if it's visible from above
# $1 = dir base (rootfs)
# $2 = pupsave dir base
test_diropq() {
	local WORKING=0
	local TMPFILE
	
	if [ -d "$2" -a ! -L "$2" ] && TMPFILE=$(mktemp "$2"/diropqtest.XXXXXXXXX); then
		[ -e "$1/${TMPFILE##*/}" ] && WORKING=1
		rm -f $TMPFILE
	fi
	#echo $1 $WORKING
	return $WORKING;
}


### actual snapmerge functions

# these clean stale whiteouts and make aufs self-consistent
# inconsistent aufs leads to crash
clean_whiteouts() {
	# check for new whiteouts - if yes remove files from pupsave
	echo "removing bottom-layer files which have been deleted from top-layer"
	find "$TMPFS" -mount \( -regex '.*/\.wh\.[^/]*' -type f \) | 
	grep -v -E ".wh..wh.orph|.wh..wh.plnk|.wh..wh.aufs" |
	while read -r FILE; do
		#echo $FILE					# $FILE is TMPFS_WHITEOUT
		FULLNAME="${FILE#$TMPFS}"
		#echo $FULLNAME
		BASE="${FULLNAME%/*}"
		#echo $BASE
		LEAF="${FULLNAME##*/}"
		#echo $LEAF
		#echo $BASE/$LEAF
		
		#PUPSAVE_FILE="${PUPSAVE}${BASE}/${LEAF:4}" # bashism
		PUPSAVE_FILE="${PUPSAVE}${BASE}/${LEAF#.wh.}"
		#echo "Deleting $PUPSAVE_FILE"
		rm -rf "$PUPSAVE_FILE"		# delete the file/dir if it's there
		
		# if this is a dir-opaque file, we need to remove the dir in pupsave
		# but only if the dir-opaque is currently working !!
		if [ "$LEAF" = ".wh..wh..opq" ] && test_diropq "${BASE}" "${PUPSAVE}${BASE}"; then
			#echo remove "${PUPSAVE}${BASE}"
			rm -rf "${PUPSAVE}${BASE}"
		fi
	done

	# check for old whiteouts - remove them from pupsave if new files created in tmpfs
	echo "removing bottom-layer whiteouts for newly added top-layer files"
	find "$PUPSAVE" -mount \( -regex '.*/\.wh\.[^/]*' -type f \) | 
	grep -v -E ".wh..wh.orph|.wh..wh.plnk|.wh..wh.aufs|.wh..wh..opq" |
	while read -r FILE; do
		#echo $FILE					# $FILE is PUPSAVE_WHITEOUT
		FULLNAME="${FILE#$PUPSAVE}"
		#echo $FULLNAME
		BASE="${FULLNAME%/*}"
		#echo $BASE
		LEAF="${FULLNAME##*/}"
		#echo $LEAF
		#echo $BASE/$LEAF
		
		#TMPFS_FILE="${TMPFS}${BASE}/${LEAF:4}" # bashism - can't find replacement with decent performance
		TMPFS_FILE="${TMPFS}${BASE}/${LEAF#.wh.}"
		#echo $TMPFS_FILE

		# delete whiteout only if a new file/dir has been created in the tmpfs layer
		if [ -e "$TMPFS_FILE" -o -L "$TMPFS_FILE" ]; then
			# if TMPFS_FILE is a dir, we need to add diropq when removing its pupsave whiteout
			# this is just in case and it won't work until next reboot anyway
			#[ -d "$TMPFS_FILE" ] && touch "$TMPFS_FILE/.wh..wh..opq"
			#echo Deleting whiteout $FILE
			rm -f "$FILE"
		fi
	done	
}

# merge using rsync
merge_layers() {
	local NEWEST_FILES RESULT
	# by now we should be consistent - so rsync everything
	echo "merging top-layer files to bottom-layer"
	rm -rf $DISKFULL
	
	# find files newer that 1 second ago
	NEWEST_FILES="$(find $TMPFS -mount -not -type d -newermt '-1 second')"
	
	if ! LANG=en_US.UTF-8 rsync $NOMERGE_LIST --out-format='/%f' -aXH -I --force "$TMPFS"/ "$PUPSAVE" > "$MERGED_FILES"; then
		case $(df -a | grep "$PUPSAVE" | awk '{print $4}') in 
			0|"") touch $DISKFULL ;;
		esac
	fi
	
	# remove files newer that 1 second ago from $MERGED_FILES, to not delete them from tmpfs
	RESULT="$(echo "$NEWEST_FILES" | grep -v -Fxf- "$MERGED_FILES")"
	echo "$RESULT" > "$MERGED_FILES"
}

# because process is not atomic, it is possible that both whiteout and files
# are still copied to bottom layer after merge. Fix it in this way:
# - if both whiteout and file exist, then delete whichever is older
# - if they are of equal age, then delete whiteout (err on side of caution)
fix_bottom_layer() {
	echo "fix bottom-layer inconsistency"
	find "$PUPSAVE" -mount \( -regex '.*/\.wh\.[^/]*' -type f \) | 
	grep -v -E ".wh..wh.orph|.wh..wh.plnk|.wh..wh.aufs|.wh..wh..opq" |
	while read -r FILE; do
		#echo $FILE					# $FILE is PUPSAVE_WHITEOUT
		FULLNAME="${FILE#$PUPSAVE}"
		#echo $FULLNAME
		BASE="${FULLNAME%/*}"
		#echo $BASE
		LEAF="${FULLNAME##*/}"
		#echo $LEAF
		#echo $BASE/$LEAF
		
		#PUPSAVE_FILE="${PUPSAVE}${BASE}/${LEAF:4}" # bashism - can't find replacement with decent performance
		PUPSAVE_FILE="${PUPSAVE}${BASE}/${LEAF#.wh.}"
		#echo $PUPSAVE_FILE

		# delete whiteout only if a new file/dir has been created in the tmpfs layer
		if [ -e "$PUPSAVE_FILE" -o -L "$PUPSAVE_FILE" ]; then
			if [ $(stat -c "%Z" "$FILE") -ge $(stat -c "%Z" "$PUPSAVE_FILE") ]; then
				# whiteout newer - delete the file - logic copied from clean_whiteout
				
				rm -rf "$PUPSAVE_FILE"		# delete the file/dir if it's there
				
				# if this is a dir-opaque file, we need to remove the dir in pupsave
				# but only if the dir-opaque is currently working !!
				if [ "$LEAF" = ".wh..wh..opq" ] && test_diropq "${BASE}" "${PUPSAVE}${BASE}"; then
					#echo remove "${PUPSAVE}${BASE}"
					rm -rf "${PUPSAVE}${BASE}"
				fi

			else
				# else delete the whiteout - logic copied from clean_whiteout
				
				# if PUPSAVE_FILE is a dir, we need to add diropq when removing its pupsave whiteout
				# this is just in case and it won't work until next reboot anyway
				[ -d "$PUPSAVE_FILE" ] && touch "$PUPSAVE_FILE/.wh..wh..opq"
				#echo Deleting whiteout $FILE
				rm -f "$FILE"
				
			fi
		fi
	done
}

# remove whiteouts not covering anything in existing SFS files
remove_non_sfs_whiteouts() {
	local SFSPoints="$(grep -oE "/aufs/kernel-modules|/aufs/pup_init|/aufs/pup_ro|/aufs/pup_ro[0-9]+" /proc/mounts | sort -u)"
	
	echo "remove non-SFS whiteouts"
	find "$PUPSAVE" -mount \( -regex '.*/\.wh\.[^/]*' -type f \) | 
	grep -v -E ".wh..wh.orph|.wh..wh.plnk|.wh..wh.aufs|.wh..wh..opq" |
	while read -r FILE; do
		#echo $FILE					# $FILE is PUPSAVE_WHITEOUT
		FULLNAME="${FILE#$PUPSAVE}"
		#echo $FULLNAME
		BASE="${FULLNAME%/*}"
		#echo $BASE
		LEAF="${FULLNAME##*/}"
		#echo $LEAF
		#echo $BASE/$LEAF
		
		pp="${BASE}/${LEAF#.wh.}"
		keep=
		for p in $SFSPoints; do
			[ -e "$p/$pp" -o -L "$p/$pp" ] && keep=yes
		done
		[ "$keep" ] || rm -f "$FILE"
	done
}

# delete merged files that are closed files, BUT only if disk is not full
remove_merged_files() {
	if [ ! -f $DISKFULL ] > /dev/null; then
		echo "removing merged top-layer files"
		list_closed_files | xargs -0 rm -rf
	fi	
	rm -f "$MERGED_FILES"
}

# force re-evalution of all the layers
force_aufs_reval() {
	echo "re-evaluating layers"
	busybox mount -i -t aufs -o remount,udba=reval aufs /
}

# warn diskfull is necessary
warn_disk_full() {
	if [ -f $DISKFULL ]; then
		[ "$DISPLAY" ] && 
		Xdialog --infobox "Your save file is full, please copy important items manually elsewhere." 0 0 10000 ||
		echo "Your save file is full, please copy important items manually elsewhere."
	fi
}


################### main ######################

renice -n -20 $$ > /dev/null # run at highest priority
clean_whiteouts
merge_layers
fix_bottom_layer
[ "$CLEAR_NON_SFS_WHITEOUT" ] && remove_non_sfs_whiteouts
remove_merged_files
force_aufs_reval
warn_disk_full
