#!/bin/bash # Copyright 2011-2019 Gentoo Authors; Distributed under the GPL v2 # might be earlier copyright, no history available # 1) Create the tarball # 2) Sanity check the tarball size and bail out if it appears abnormal. # 3) Compress tarball(bzip2,xz) # 4) sign # 5) delta generation # 6) create some symlinks # 7) tidy up # 8) clean up old deltas umask 022 source /usr/local/bin/mastermirror/rsync-gen.vars # locations (if used in multiple files, please move to rsync-gen.vars file) MASTER="${FINALDIR_repo_gentoo}" # the master gentoo-x86 copy TEMP="${BASE}/snapshots-tmp/" # working directory HISTORY="7" #number in dsays of previous snapshots to keep DELTA_UPLOAD="${UPLOAD}/deltas/" [[ -d ${TEMP} ]] || mkdir ${TEMP} [[ -d ${UPLOAD} ]] || mkdir ${UPLOAD} [[ -d ${DELTA_UPLOAD} ]] || mkdir ${DELTA_UPLOAD} write_time_log "---------------------------------------------------------" write_time_log "START ENTIRE SCRIPT $(date -u)" # used to name the file DELTA_BASE=`/bin/date -d '-2 day' +%Y%m%d` DELTA_FILENAME="portage-${DELTA_BASE}.tar.bz2" YESTERDAY=`/bin/date -d yesterday +%Y%m%d` FILENAME="portage-${YESTERDAY}.tar.bz2" FILENAME_NEW="gentoo-${YESTERDAY}.tar.xz" # Parallel compressors can use a LOT of cpu, be nice about it NICE="nice -n 10" SIGNKEYID=${SIGNKEYID_snapshot} # Verify signing key is available if [[ ! $(gpg --list-keys ${SIGNKEYID} | grep ${SIGNKEYID}) ]]; then echo "${SIGNKEYID} not imported! exiting" exit 1 fi # Use yesterdays snapshot, if its unavailable, use the 'latest' snapshot. # If both are missing, bail out. POTENTIAL_SNAPSHOTS=( ${UPLOAD}/${DELTA_FILENAME} ${UPLOAD}/portage-latest.tar.bz2 FAIL ) for previous_snapshot in ${POTENTIAL_SNAPSHOTS[*]}; do [[ -e ${previous_snapshot} ]] && break done if [[ ${previous_snapshot} == FAIL ]]; then # Cut FAIL out of the printed list, to ease debugging. echo "Previous snapshots do not exist: ${POTENTIAL_SNAPSHOTS[*]:0:2}" exit 1 fi # Find fastest BZIP2 # lbzip2,pbzip2 default to multiple threads # emerge-delta-webrsync relies on verifying signature of tarball after # re-compressing it with bzip2, therefore failing if the signature # was done on lbzip2 or pbzip2 compressed tarball, #573908 for BZIP2_PROG in bzip2 FAIL ; do [ -n "$(type $BZIP2_PROG 2>/dev/null)" ] && break done if [ $BZIP2_PROG == FAIL ]; then echo "Could not find any BZIP2" 1>&2 exit 1 fi # Find fastest XZ # pixz appends some data and leads to SIGPIPE, #573642 for XZ_PROG in xz FAIL ; do [ -n "$(type $XZ_PROG 2>/dev/null)" ] && break done if [ $XZ_PROG == FAIL ]; then echo "Could not find any xz" 1>&2 exit 1 fi # Newer 'xz' supports threads as well, but defaults to single-threaded if $XZ_PROG --help |grep -sq threads=NUM ; then XZ_PROG="${XZ_PROG} -T 0" fi # working dir cd ${TEMP} # Tarball generation write_time_log "START TARBALL $(date -u)" # 1a) Tarball prep: write_time_log "START TARBALL(prep) $(date -u)" # Build exclusion list EXCLUSION_LIST="$(mktemp -p ${TEMP} snapshot-exclude.XXXXXXXXXX)" /usr/local/bin/mastermirror/print-exclusion-list.sh "${MASTER}" >"${EXCLUSION_LIST}" write_time_log "END TARBALL(prep) $(date -u)" # 1b) Create the tarball # create the tarball and move it to the right location write_time_log "START TARBALL(tar,old) $(date -u)" if [ ! -f "${FILENAME%.bz2}" ]; then TAR_OPTIONS=( # Force a small block size --blocking-factor=1 --record-size=512 # GNU tar format saves approximately 1K per file in the tarball over POSIX # format. Multiply ~170k files, and the savings are large. --format=gnu # Sorting by name produces consistent ordering and helps compression of # related content. Custom ordering might further improve ordering in future # (eg all metadata.xml first) --sort=name # Force ownership of content: --owner=portage --group=portage # Excluded content: --no-wildcards --exclude-from "${EXCLUSION_LIST}" # Do not capture any xattr/acl info at all. --no-acls --no-xattrs --no-selinux # Include a volume ID for tracing # volume header is not supported by: # - Docker https://bugs.gentoo.org/631644 # - tarsync https://bugs.gentoo.org/631616 # -V "${FILENAME%.bz2}" # do everything relative to the destination -C "${MASTER}" # The . needs to match the file argument --transform='s,^\.,portage,g' # The operation, destination, source arguments --create --file ${FILENAME%.bz2} . ) tar "${TAR_OPTIONS[@]}" rc=$? if [ $rc -ne 0 ]; then echo "Tar run failed!" exit 1 fi fi rm -f "${EXCLUSION_LIST}" # Not needed after this write_time_log "END TARBALL(tar,old) $(date -u)" write_time_log "START TARBALL(umd5,old) $(date -u)" [ ! -f " ${FILENAME}.umd5sum" ] && md5sum ${FILENAME%.bz2} > ${FILENAME}.umd5sum write_time_log "END TARBALL(umd5,old) $(date -u)" # end 1b) # 1c) Also create new-style tarball, # but do it via transformation of old tarball write_time_log "START TARBALL(transform,new) $(date -u)" if [ ! -f "${FILENAME_NEW%.xz}" ]; then # The newer tarball differs in the following ways: # Filename 'portage-YYYYMMDD' -> 'gentoo-YYYYMMDD' # Path prefex 'portage' -> 'gentoo-YYYYMMDD' # # Instead of rebuilding the entire tarball, it's MUCH faster to consider it # a single IO stream and process it as such. # tar-transform-names.pl is a small wrapper around Perl Archive::Tar::Stream # that just changes filenames /usr/local/bin/mastermirror//tar-transform-names.pl \ --input-file "${FILENAME%.bz2}" \ --output-file "${FILENAME_NEW%.xz}" \ --regex "s/^portage/${FILENAME_NEW%%.*}/" rc=$? if [ $rc -ne 0 ]; then echo "Tar transform run!" exit 1 fi fi write_time_log "END TARBALL(transform,new) $(date -u)" write_time_log "START TARBALL(umd5,new) $(date -u)" [ ! -f "${FILENAME_NEW}.umd5sum" ] && md5sum ${FILENAME_NEW%.xz} > ${FILENAME_NEW}.umd5sum write_time_log "END TARBALL(umd5,new) $(date -u)" # end 1c) # end 1) write_time_log "END TARBALL $(date -u)" # 2) Sanity check the tarball size and bail out if it appears abnormal. write_time_log "START SIZE SANITY $(date -u)" current_size=$(stat -c '%s' "${FILENAME%.bz2}") if [ "${previous_snapshot}" != "${previous_snapshot%.bz2}" ] ; then previous_size=$($BZIP2_PROG -k -d -c "${previous_snapshot}" |wc -c) elif [ "${previous_snapshot}" != "${previous_snapshot%.xz}" ] ; then previous_size=$($XZ_PROG -k -d -c "${previous_snapshot}" |wc -c) elif [ "${previous_snapshot}" != "${previous_snapshot%.tar}" ] ; then previous_size=$(stat -c '%s' "${previous_snapshot}") else previous_size=0 fi if [ -z "$current_size" ] || [ "${current_size}" -lt "${previous_size}" ]; then size_difference=$(expr ${previous_size} - ${current_size}) difference_ratio=$(expr ${previous_size} / ${size_difference}) if [ ${difference_ratio} -lt 2 ]; then echo "Snapshot size has decreased by more than 50% in one day!!!" echo "${FILENAME} ${current_size} bytes" echo "${previous_snapshot} ${previous_size} bytes" exit 1 elif [ ${difference_ratio} -lt 5 ]; then echo "Snapshot size has decreased by more than 20% in one day!!!" echo "${FILENAME} ${current_size} bytes" echo "${previous_snapshot} ${previous_size} bytes" # Make this non-fatal while we recover more space. #exit 1 fi fi write_time_log "END SIZE SANITY $(date -u)" # end 2) # 3) create xz tarball write_time_log "START COMPRESS $(date -u)" # This happens only AFTER the sanity check # 3a) bzip2 of old tarball write_time_log "START COMPRESS(bzip2,old) $(date -u)" [ ! -f "${FILENAME%.bz2}.bz2" ] && ${NICE} $BZIP2_PROG -k9 ${FILENAME%.bz2} write_time_log "END COMPRESS(bzip2,old) $(date -u)" # 3b) xz of old tarball write_time_log "START COMPRESS(xz,old) $(date -u)" if [ ! -f "${FILENAME%.*}.xz" ] ; then # pixz, pxz, xz all differ in filename generation # xz: .tar -> .tar.xz # pixz: .tar -> .tpxz # pxz: .tar -> .txz # # To avoid this, be explicit by using IO. ${NICE} ${XZ_PROG} -9 -e <"${FILENAME%.*}" >"${FILENAME%.*}.xz" || exit $? fi write_time_log "END COMPRESS(xz,old) $(date -u)" # 3c) xz of new tarball write_time_log "START COMPRESS(xz,new) $(date -u)" if [ ! -f "${FILENAME_NEW%.*}.xz" ] ; then # pixz, pxz, xz all differ in filename generation # xz: .tar -> .tar.xz # pixz: .tar -> .tpxz # pxz: .tar -> .txz # # To avoid this, be explicit by using IO. ${NICE} ${XZ_PROG} -9 -e <"${FILENAME_NEW%.*}" >"${FILENAME_NEW%.*}.xz" || exit $? fi write_time_log "END COMPRESS(xz,new) $(date -u)" write_time_log "END COMPRESS $(date -u)" # end 3) # 4) sign write_time_log "START SIGN $(date -u)" for f in "${FILENAME}" "${FILENAME%.*}".xz "${FILENAME_NEW}"; do if [ ! -f "${UPLOAD}${f}".umd5sum ]; then cp "${f}".umd5sum "${UPLOAD}${f}".umd5sum || exit $? md5sum "$f" > "$f".md5sum || exit $? fi if [ ! -f "$f".gpgsig ]; then gpg --batch -u "${SIGNKEYID}" --armor --detach-sign \ --output "$f".gpgsig "$f" || exit $? fi mv "$f" "$f".md5sum "$f".gpgsig "${UPLOAD}"/ || exit $? done write_time_log "END SIGN $(date -u)" # end 4) # 5) delta generation # Delta is not generated for new filename at this time write_time_log "START DELTA $(date -u)" PATCH=snapshot-${DELTA_BASE}-${YESTERDAY}.patch.bz2 if [ ! -f "${PATCH}" ]; then ${NICE} ${BZIP2_PROG} -dkc "${previous_snapshot}" > orig /usr/bin/differ -f bdelta orig ${FILENAME%.bz2} ${PATCH%.bz2} ${NICE} ${BZIP2_PROG} -9 ${PATCH%.bz2} md5sum ${PATCH} > ${PATCH}.md5sum chmod 644 ${PATCH}{,.md5sum} mv ${PATCH}{,.md5sum} ${DELTA_UPLOAD} fi write_time_log "END DELTA $(date -u)" # end 5) # 6) create some symlinks write_time_log "START SYMLINK $(date -u)" cd ${UPLOAD} for f in "${FILENAME}" "${FILENAME%.*}".xz ; do ext=${f##*.} ln -sf "$f" "${UPLOAD}"portage-latest.tar.${ext} || exit $? rm -f "${UPLOAD}"portage-latest.tar.${ext}.md5sum || exit $? sed "s/${f}\$/portage-latest.tar.${ext}/" "${UPLOAD}"${f}.md5sum > \ "${UPLOAD}"portage-latest.tar.${ext}.md5sum || exit $? ln -sf "${f}".gpgsig "${UPLOAD}"portage-latest.tar.${ext}.gpgsig || exit $? done for f in "${FILENAME_NEW}" ; do ext=${f##*.} ln -sf "$f" "${UPLOAD}"gentoo-latest.tar.${ext} || exit $? rm -f "${UPLOAD}"gentoo-latest.tar.${ext}.md5sum || exit $? sed "s/${f}\$/gentoo-latest.tar.${ext}/" "${UPLOAD}"${f}.md5sum > \ "${UPLOAD}"gentoo-latest.tar.${ext}.md5sum || exit $? ln -sf "${f}".gpgsig "${UPLOAD}"gentoo-latest.tar.${ext}.gpgsig || exit $? done write_time_log "END SYMLINK $(date -u)" # end 6) # 7) tidy up write_time_log "START CLEANUP $(date -u)" cd "${TEMP}" rm -f orig \ ${FILENAME%.bz2} \ ${FILENAME_NEW%.xz} \ "${FILENAME}".umd5sum \ "${FILENAME_NEW}".umd5sum \ || exit $? /usr/bin/find ${UPLOAD} -maxdepth 1 -mtime +${HISTORY} -type f | /usr/bin/xargs /bin/rm -f write_time_log "END CLEANUP $(date -u)" # end 7) # 8) clean up old deltas # Delta is not generated for new filename at this time write_time_log "START CLEANUP DELTA $(date -u)" /usr/local/bin/mastermirror/clean-old-deltas.py "${DELTA_UPLOAD}" "${YESTERDAY}" $(stat -c '%s' "${UPLOAD}/${FILENAME}") > /dev/null write_time_log "END CLEANUP DELTA $(date -u)" # end 8) write_time_log "END ENTIRE SCRIPT $(date -u)"