#!/bin/bash # Copyright 2011-2019 Gentoo Authors; Distributed under the GPL v2 # might be earlier copyright, no history available # 1) Create the tarball # 2) Sanity check the tarball size and bail out if it appears abnormal. # 3) Compress tarball(bzip2,xz) # 4) sign # 5) delta generation # 6) create some symlinks # 7) tidy up # 8) clean up old deltas umask 022 RSYNC_VARS=${RSYNC_VARS:=/usr/local/bin/mastermirror/rsync-gen.vars} source "${RSYNC_VARS}" || exit 3 # locations (if used in multiple files, please move to rsync-gen.vars file) MASTER="${FINALDIR_repo_gentoo}" # the master gentoo-x86 copy TEMP="${BASE}/snapshots-tmp/" # working directory HISTORY="7" #number in dsays of previous snapshots to keep DELTA_UPLOAD="${UPLOAD}/deltas/" [[ -d ${TEMP} ]] || mkdir ${TEMP} [[ -d ${UPLOAD} ]] || mkdir ${UPLOAD} [[ -d ${DELTA_UPLOAD} ]] || mkdir ${DELTA_UPLOAD} write_time_log "---------------------------------------------------------" write_time_log "START ENTIRE SCRIPT $(date -u)" # used to name the file DELTA_BASE=$(/bin/date -d '-2 day' +%Y%m%d) DELTA_FILENAME="portage-${DELTA_BASE}.tar.bz2" YESTERDAY=$(/bin/date -d yesterday +%Y%m%d) FILENAME="portage-${YESTERDAY}.tar.bz2" FILENAME_NEW="gentoo-${YESTERDAY}.tar.xz" # Parallel compressors can use a LOT of cpu, be nice about it NICE="nice -n 10" SIGNKEYID=${SIGNKEYID_snapshot} # Verify signing key is available if ! gpg --list-keys "${SIGNKEYID}" | grep -sq "${SIGNKEYID}"; then echo "${SIGNKEYID} not imported! exiting" exit 1 fi # Use yesterdays snapshot, if its unavailable, use the 'latest' snapshot. # If both are missing, bail out. POTENTIAL_SNAPSHOTS=( ${UPLOAD}/${DELTA_FILENAME} ${UPLOAD}/portage-latest.tar.bz2 FAIL ) for previous_snapshot in ${POTENTIAL_SNAPSHOTS[*]}; do [[ -e ${previous_snapshot} ]] && break done if [[ ${previous_snapshot} == FAIL ]]; then # Cut FAIL out of the printed list, to ease debugging. echo "Previous snapshots do not exist: ${POTENTIAL_SNAPSHOTS[*]:0:2}" exit 1 fi # Find fastest BZIP2 # lbzip2,pbzip2 default to multiple threads # emerge-delta-webrsync relies on verifying signature of tarball after # re-compressing it with bzip2, therefore failing if the signature # was done on lbzip2 or pbzip2 compressed tarball, #573908 for BZIP2_PROG in bzip2 FAIL ; do [ -n "$(type $BZIP2_PROG 2>/dev/null)" ] && break done if [ $BZIP2_PROG == FAIL ]; then echo "Could not find any BZIP2" 1>&2 exit 1 fi # Find fastest XZ # pixz appends some data and leads to SIGPIPE, #573642 for XZ_PROG in xz FAIL ; do [ -n "$(type $XZ_PROG 2>/dev/null)" ] && break done if [ $XZ_PROG == FAIL ]; then echo "Could not find any xz" 1>&2 exit 1 fi # Newer 'xz' supports threads as well, but defaults to single-threaded if $XZ_PROG --help |grep -sq threads=NUM ; then XZ_PROG="${XZ_PROG} -T 0" fi # working dir cd ${TEMP} # Tarball generation write_time_log "START TARBALL $(date -u)" # 1a) Tarball prep: write_time_log "START TARBALL(prep) $(date -u)" # Build exclusion list EXCLUSION_LIST="$(mktemp -p ${TEMP} snapshot-exclude.XXXXXXXXXX)" /usr/local/bin/mastermirror/print-exclusion-list.sh "${MASTER}" >"${EXCLUSION_LIST}" write_time_log "END TARBALL(prep) $(date -u)" # 1b) Create the tarball COMMON_TAR_OPTIONS=( # Force a small block size '--blocking-factor=1' '--record-size=512' # Tar format rationale: # --------------------- # Longest directory prefix, 94 chars: # gentoo-YYYYMMDD/profiles/default/linux/powerpc/ppc64/17.0/64bit-userland/little-endian/systemd/ # Longest path, 140 chars: # gentoo-YYYYMMDD/gnome-extra/gnome-shell-extension-applications-overview-tooltip/gnome-shell-extension-applications-overview-tooltip-6.ebuild # Longest filename, 91 chars: # spirv-tools-2019.10_pre20191027-Respect-CMAKE_INSTALL_LIBDIR-in-installed-CMake-files.patch # # These length require that a tar format with unlimited length is used, the # split-255 layout of ustar is not gaurenteed to be long enough. # # The tar formats with unlimited length are: gnutar, posix # # The posix tar format has additional 1K overhead per entry, and the Gentoo # repo has ~160k entries (files & dirs), which adds up to 160M extra in the # tarball. # The differ/bdelta toolchain is also confirmed to work on posix tar # format. '--format=gnu' # Sorting by name produces consistent ordering and helps compression of # related content. Custom ordering might further improve ordering in future # (eg all metadata.xml first) '--sort=name' # Force ownership of content: '--owner=portage' '--group=portage' # Excluded content: '--no-wildcards' "--exclude-from=${EXCLUSION_LIST}" # Do not capture any xattr/acl info at all. '--no-acls' '--no-xattrs' '--no-selinux' # Include a volume ID for tracing # volume header is NOT supported by: # - Docker https://bugs.gentoo.org/631644 # - tarsync https://bugs.gentoo.org/631616 #"--label=${FILENAME%.bz2}" # do everything relative to the destination "--directory=${MASTER}" ) # create the tarball and move it to the right location write_time_log "START TARBALL(tar,old) $(date -u)" if [ ! -f "${FILENAME%.bz2}" ]; then OLD_TARBALL_OPTIONS=( # The . needs to match the file argument "--transform=s,^\\.,portage,g" # The operation, destination, source arguments '--create' "--file=${FILENAME%.bz2}" . ) tar \ "${COMMON_TAR_OPTIONS[@]}" \ "${OLD_TARBALL_OPTIONS[@]}" rc=$? if [ $rc -ne 0 ]; then echo "TARBALL(tar,old) failed!" echo "TARBALL(tar,old) failed!" 1>&2 exit 1 fi fi write_time_log "END TARBALL(tar,old) $(date -u)" write_time_log "START TARBALL(umd5,old) $(date -u)" if [ ! -f " ${FILENAME}.umd5sum" ]; then md5sum ${FILENAME%.bz2} | tee ${FILENAME%.bz2}.bz2.umd5sum ${FILENAME%.bz2}.xz.umd5sum >/dev/null fi write_time_log "END TARBALL(umd5,old) $(date -u)" # end 1b) # 1c) Also create new-style tarball, # but do it via transformation of old tarball write_time_log "START TARBALL(tar,new) $(date -u)" if [ ! -f "${FILENAME_NEW%.xz}" ]; then # The newer tarball differs in the following ways: # Filename 'portage-YYYYMMDD' -> 'gentoo-YYYYMMDD' # Path prefex 'portage' -> 'gentoo-YYYYMMDD' # # Earlier code used tar-transform-names.pl as a wrapper around Perl # Archive::Tar::Stream, but it was found that the Archive::Tar::Stream # codebase did not handle gnutar format correctly. # https://bugs.gentoo.org/703460 # # While the stream processing was MUCH faster (because it didn't traverse # the filesystem at all), it broke on very long filenames that ARE present # in the Gentoo repo: #/usr/local/bin/mastermirror//tar-transform-names.pl \ # --input-file "${FILENAME%.bz2}" \ # --output-file "${FILENAME_NEW%.xz}" \ # --regex "s/^portage/${FILENAME_NEW%%.*}/" NEW_TARBALL_OPTIONS=( # The . needs to match the file argument "--transform=s,^\\.,${FILENAME_NEW%%.*},g" # The operation, destination, source arguments '--create' "--file=${FILENAME_NEW%.xz}" . ) tar \ "${COMMON_TAR_OPTIONS[@]}" \ "${NEW_TARBALL_OPTIONS[@]}" rc=$? if [ $rc -ne 0 ]; then echo "TARBALL(tar,new) failed!" echo "TARBALL(tar,new) failed!" 1>&2 exit 1 fi fi write_time_log "END TARBALL(tar,new) $(date -u)" write_time_log "START TARBALL(umd5,new) $(date -u)" if [ ! -f " ${FILENAME_NEW}.umd5sum" ]; then md5sum ${FILENAME_NEW%.xz} > ${FILENAME_NEW}.umd5sum fi write_time_log "END TARBALL(umd5,new) $(date -u)" # end 1c) # end 1) write_time_log "END TARBALL $(date -u)" # 2) Sanity check the tarball size and bail out if it appears abnormal. write_time_log "START SIZE SANITY $(date -u)" current_size=$(stat -c '%s' "${FILENAME%.bz2}") if [ "${previous_snapshot}" != "${previous_snapshot%.bz2}" ] ; then previous_size=$($BZIP2_PROG -k -d -c "${previous_snapshot}" |wc -c) elif [ "${previous_snapshot}" != "${previous_snapshot%.xz}" ] ; then previous_size=$($XZ_PROG -k -d -c "${previous_snapshot}" |wc -c) elif [ "${previous_snapshot}" != "${previous_snapshot%.tar}" ] ; then previous_size=$(stat -c '%s' "${previous_snapshot}") else previous_size=0 fi if [ -z "$current_size" ] || [ "${current_size}" -lt "${previous_size}" ]; then size_difference=$(expr ${previous_size} - ${current_size}) difference_ratio=$(expr ${previous_size} / ${size_difference}) if [ ${difference_ratio} -lt 2 ]; then echo "Snapshot size has decreased by more than 50% in one day!!!" echo "${FILENAME} ${current_size} bytes" echo "${previous_snapshot} ${previous_size} bytes" exit 1 elif [ ${difference_ratio} -lt 5 ]; then echo "Snapshot size has decreased by more than 20% in one day!!!" echo "${FILENAME} ${current_size} bytes" echo "${previous_snapshot} ${previous_size} bytes" # Make this non-fatal while we recover more space. #exit 1 fi fi write_time_log "END SIZE SANITY $(date -u)" # end 2) # 3) create xz tarball write_time_log "START COMPRESS $(date -u)" # This happens only AFTER the sanity check # 3a) bzip2 of old tarball write_time_log "START COMPRESS(bzip2,old) $(date -u)" [ ! -f "${FILENAME%.bz2}.bz2" ] && ${NICE} $BZIP2_PROG -k9 ${FILENAME%.bz2} write_time_log "END COMPRESS(bzip2,old) $(date -u)" # 3b) xz of old tarball write_time_log "START COMPRESS(xz,old) $(date -u)" if [ ! -f "${FILENAME%.*}.xz" ] ; then # pixz, pxz, xz all differ in filename generation # xz: .tar -> .tar.xz # pixz: .tar -> .tpxz # pxz: .tar -> .txz # # To avoid this, be explicit by using IO. ${NICE} ${XZ_PROG} -9 -e <"${FILENAME%.*}" >"${FILENAME%.*}.xz" || exit $? fi write_time_log "END COMPRESS(xz,old) $(date -u)" # 3c) xz of new tarball write_time_log "START COMPRESS(xz,new) $(date -u)" if [ ! -f "${FILENAME_NEW%.*}.xz" ] ; then # pixz, pxz, xz all differ in filename generation # xz: .tar -> .tar.xz # pixz: .tar -> .tpxz # pxz: .tar -> .txz # # To avoid this, be explicit by using IO. ${NICE} ${XZ_PROG} -9 -e <"${FILENAME_NEW%.*}" >"${FILENAME_NEW%.*}.xz" || exit $? fi write_time_log "END COMPRESS(xz,new) $(date -u)" write_time_log "END COMPRESS $(date -u)" # end 3) # 4) sign write_time_log "START SIGN $(date -u)" for f in "${FILENAME}" "${FILENAME%.*}".xz "${FILENAME_NEW}"; do if [ ! -f "${UPLOAD}${f}".umd5sum ]; then cp "${f}".umd5sum "${UPLOAD}${f}".umd5sum || exit $? md5sum "$f" > "$f".md5sum || exit $? fi if [ ! -f "$f".gpgsig ]; then gpg --batch -u "${SIGNKEYID}" --armor --detach-sign \ --output "$f".gpgsig "$f" || exit $? fi mv "$f" "$f".md5sum "$f".gpgsig "${UPLOAD}"/ || exit $? done write_time_log "END SIGN $(date -u)" # end 4) # 5) delta generation # Delta is not generated for new filename at this time write_time_log "START DELTA $(date -u)" PATCH=snapshot-${DELTA_BASE}-${YESTERDAY}.patch.bz2 if [ ! -f "${PATCH}" ]; then SNAPSHOT_BASE="${TEMP}"/$(basename "${previous_snapshot%.bz2}")-snapshotbase ${NICE} ${BZIP2_PROG} -dkc "${previous_snapshot}" > "${SNAPSHOT_BASE}" /usr/bin/differ -f bdelta \ "${SNAPSHOT_BASE}" \ "${FILENAME%.bz2}" \ "${PATCH%.bz2}" ${NICE} ${BZIP2_PROG} -9 "${PATCH%.bz2}" md5sum "${PATCH}" > "${PATCH}.md5sum" chmod 644 "${PATCH}"{,.md5sum} mv "${PATCH}"{,.md5sum} "${DELTA_UPLOAD}" fi write_time_log "END DELTA $(date -u)" # end 5) # 6) create some symlinks write_time_log "START SYMLINK $(date -u)" cd ${UPLOAD} for f in "${FILENAME}" "${FILENAME%.*}".xz ; do ext=${f##*.} ln -sf "$f" "${UPLOAD}"portage-latest.tar.${ext} || exit $? rm -f "${UPLOAD}"portage-latest.tar.${ext}.md5sum || exit $? sed "s/${f}\$/portage-latest.tar.${ext}/" "${UPLOAD}"${f}.md5sum > \ "${UPLOAD}"portage-latest.tar.${ext}.md5sum || exit $? ln -sf "${f}".gpgsig "${UPLOAD}"portage-latest.tar.${ext}.gpgsig || exit $? done # shellcheck disable=SC2066 for f in "${FILENAME_NEW}" ; do ext=${f##*.} ln -sf "$f" "${UPLOAD}"gentoo-latest.tar.${ext} || exit $? rm -f "${UPLOAD}"gentoo-latest.tar.${ext}.md5sum || exit $? sed "s/${f}\$/gentoo-latest.tar.${ext}/" "${UPLOAD}"${f}.md5sum > \ "${UPLOAD}"gentoo-latest.tar.${ext}.md5sum || exit $? ln -sf "${f}".gpgsig "${UPLOAD}"gentoo-latest.tar.${ext}.gpgsig || exit $? done write_time_log "END SYMLINK $(date -u)" # end 6) # 7) tidy up write_time_log "START CLEANUP $(date -u)" # Cleanup immediate stuff rm -v -f \ "${TEMP}"/orig \ "${TEMP}"/"${FILENAME%.bz2}"{.bz2,.xz,}{,.umd5sum} \ "${TEMP}"/"${FILENAME_NEW%.xz}"{.bz2,.xz,}{,.umd5sum} \ "${TEMP}"/"${EXCLUSION_LIST}" \ || exit $? # Cleanup older stuff that might have leaked /usr/bin/find \ "${TEMP}" \ -regextype egrep \ -maxdepth 1 \ -type f \ -mtime +0 \ \( \ -regex '.*/(portage|gentoo)-[0-9]{6}.*' \ -o \ -name 'snapshot-exclude.*' \ -o \ -name orig \ \) \ -delete # Cleanup old snapshots /usr/bin/find \ "${UPLOAD}" \ -maxdepth 1 \ -type f \ -mtime +${HISTORY} \ -delete write_time_log "END CLEANUP $(date -u)" # end 7) # 8) clean up old deltas # Delta is not generated for new filename at this time write_time_log "START CLEANUP DELTA $(date -u)" /usr/local/bin/mastermirror/clean-old-deltas.py "${DELTA_UPLOAD}" "${YESTERDAY}" $(stat -c '%s' "${UPLOAD}/${FILENAME}") > /dev/null write_time_log "END CLEANUP DELTA $(date -u)" # end 8) write_time_log "END ENTIRE SCRIPT $(date -u)"