#!/bin/bash # Copyright 2005-2015 Gentoo Foundation; Distributed under the GPL v2 # might be earlier copyright, no history available # 0) Make a backup incase there is something wrong with server side # 1) Update rsync checkouts or exports from cvs.g.o::mastermirror-staging # - cvs.g.o::mastermirror-staging is populated by script mastermirror-staging.sh # 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR_repo_gentoo # 1c) source (bash -n) eclasses to check for syntax errors # 2) generate metadata (egencache) # 3) place dtd info & xml schemas in STAGEDIR_repo_gentoo # 4) place glsa's in STAGEDIR_repo_gentoo # 5) place news in STAGEDIR_repo_gentoo # 6) place projects.xml in STAGEDIR_repo_gentoo # 7) rsync from STAGEDIR_repo_gentoo to FINALDIR_repo_gentoo # 8) cache generated data # 9) rsync from STAGEDIR_repo_gentoo to REPODIR ### testing ################################################################################ # NOTE: You MUST run git-restore-mtime -m on the git checkouts hosted in # git.gentoo.org::mastermirror-staging/ or this WILL NOT WORK. # You can get git-restore-mtime here: # https://github.com/MestreLion/git-tools/blob/master/git-restore-mtime ################################################################################ umask 022 source /usr/local/bin/mastermirror/rsync-gen.vars BINDIR=${BINDIR:=$(dirname $0)} SIGNKEYID=${SIGNKEYID_rsync} # block_start and block___end are named to keep function _log() { logfmt_what_width=22 logfmt_time_width=40 ts="@$(date +%s)/$(date -u)" printf "%-5s %-${logfmt_what_width}s %${logfmt_time_width}s\n" "$1" "$2" "$ts" } function timelog_start() { _log "START" "$1" } function timelog___end() { _log "END" "$1" } function timelog_MARKER() { perl -e 'print "-"x70,"\n";' } # We want to exclude many RCS files, but we have some exceptions to the 'cvs-exclude' list: # - 'core' is actually a valid package name that we do use for dev-ml/core. # - '*.old' is used by 'net-p2p/freenet/files/freenet.old' RSYNC_GIT_EXCLUDE="--include core/ --include '*.old' --cvs-exclude --exclude=.gitignore" #--filter='dir-merge /.cvsignore' --filter='exclude .cvsignore' #--filter='dir-merge /.gitignore' --filter='exclude .gitignore' # sanity checking [[ -d ${EXPORTS} ]] || mkdir ${EXPORTS} [[ -d ${STAGEDIR_repo_gentoo} ]] || mkdir ${STAGEDIR_repo_gentoo} [[ -d ${FINALDIR_repo_gentoo} ]] || mkdir ${FINALDIR_repo_gentoo} [[ -d ${REPODIR} ]] || mkdir ${REPODIR} [[ -d ${LOGDIR} ]] || mkdir ${LOGDIR} [[ -d ${BASE}/tmp ]] || mkdir ${BASE}/tmp TIMESLOG="${LOGDIR}/rsync-gen-times.log" for atomic_rsync in `which atomic-rsync 2>/dev/null` /usr/share/rsync/atomic-rsync MISSING ; do [ -x $atomic_rsync ] && break done if [ "$atomic_rsync" == "MISSING" ]; then echo "$0: Cannot do final atomic rsync into place, atomic-rsync tool is missing" exit 1 fi function timelogger() { tee -a "$TIMESLOG" | logger -t rsync-gen-times -p INFO } TIME_ENTIRE_START=$(date -u +%s) timelog_MARKER | timelogger timelog_start "ENTIRE SCRIPT" | timelogger # Verify signing key is available if [[ ! $(gpg --list-keys ${SIGNKEYID} | grep ${SIGNKEYID}) ]]; then echo "${SIGNKEYID} not imported! exiting" exit 1 fi # 0) Make a backup incase there is something wrong with server side #echo "START BACKUP $(date -u)" | timelogger #cp -al ${FINALDIR_repo_gentoo}/ ${FINALDIR_repo_gentoo}.bak/ #echo "END BACKUP $(date -u)" | timelogger # 1) Update checkouts from git.g.o RSYNC="/usr/bin/rsync" timelog_start "STAGING FROM GIT" | timelogger $BINDIR/mastermirror-staging.sh rc=$? timelog___end "STAGING FROM GIT" | timelogger if [[ $rc -ne 0 ]]; then timelog___end "ENTIRE SCRIPT - ABORTED DUE TO CHECKOUT FAILURE" | timelogger exit 1 fi # end 1) # 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR_repo_gentoo # timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR_repo_gentoo # timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR_repo_gentoo # We want to track the progress of STAGEDIR_repo_gentoo->FINALDIR_repo_gentoo so exclude .x file here # (so it isn't deleted) # Keep /metadata/cache around so the --rsync switch of egencache will work as # designed timelog_start "STAGEDIR_repo_gentoo RSYNC" | timelogger rsync -Wqa --checksum --no-times \ $RSYNC_GIT_EXCLUDE \ --filter 'P /metadata/***' \ --delete --delete-excluded \ ${EXPORTS}/gentoo-x86/ ${STAGEDIR_repo_gentoo}/ || exit timelog___end "STAGEDIR_repo_gentoo RSYNC" | timelogger # end 1b) # 1c) source (bash -n) eclasses to check for syntax errors timelog_start "ECLASS CHECK" | timelogger for i in ${STAGEDIR_repo_gentoo}/eclass/*.eclass; do bash -n $i || { echo "failed to source $i, exiting"; exit 1; } done timelog___end "ECLASS CHECK" | timelogger # end 1c) # 2) generate metadata (egencache) # ${STAGEDIR_repo_gentoo}/metadata/cache is created automatically export PORTAGE_USERNAME=gmirror PORTAGE_GRPNAME=gmirror LOG_TIMESTAMP=$(date -u '+%Y%m%d-%H%M') REGEN_LOG_FILE=regen-run-${LOG_TIMESTAMP}.log REGEN_LOG_DIR=${LOGDIR}/regen [[ -d ${REGEN_LOG_DIR} ]] || mkdir ${REGEN_LOG_DIR} # TODO: Implement a monitoring system that guarantees low-latency human # intervention when necessary, and stop using --tolerant (bug #239266). timelog_start "REGEN" | timelogger # Ensure layout.conf is set to thin-manifests so egencache doesn't error sed -i \ -e '/^thin-manifests/s,false,true,g' \ -e '/^sign-manifests/s,true,false,g' \ -e '/^sign-commits/s,false,true,g' \ ${STAGEDIR_repo_gentoo}/metadata/layout.conf # for egencache, set user/group or make sure the user is in the portage group GIT_DIR=${EXPORTS}/gentoo-x86.git/ \ egencache --update --rsync $PARALLEL_PARAMS \ --tolerant --cache-dir=${BASE}/tmp/ \ --repositories-configuration=" [gentoo] location = ${STAGEDIR_repo_gentoo} " \ --update-use-local-desc \ --repo=gentoo \ >> ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} 2>&1 rval=$? timelog___end "REGEN" | timelogger if [[ ${rval} != 0 ]]; then echo "$0: something happened with egencache, cowardly refusing to continue" echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}:" cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}" exit 5 fi # don't save empty files if [[ ! -s ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} ]]; then rm ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} fi # Keep 30 days of logs find ${REGEN_LOG_DIR} -type f -mtime +30 -print0 | xargs -0r rm # Force Manifests to thick, unsigned # also disable commit signing for now. sed -i \ -e '/^thin-manifests/s,true,false,g' \ -e '/^sign-manifests/s,true,false,g' \ -e '/^sign-commits/s,true,false,g' \ ${STAGEDIR_repo_gentoo}/metadata/layout.conf # Mark that metadata is done date -u > ${STAGEDIR_repo_gentoo}/metadata/timestamp # end 2) # 3) place dtd info in STAGEDIR_repo_gentoo timelog_start "DTD" | timelogger rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/dtd ${STAGEDIR_repo_gentoo}/metadata/ date -R -u > ${STAGEDIR_repo_gentoo}/metadata/dtd/timestamp.chk timelog___end "DTD" | timelogger # end 3) # 3b) place xml schemas in STAGEDIR_repo_gentoo timelog_start "XML-SCHEMA" | timelogger rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/xml-schema ${STAGEDIR_repo_gentoo}/metadata/ date -R -u > ${STAGEDIR_repo_gentoo}/metadata/xml-schema/timestamp.chk timelog___end "XML-SCHEMA" | timelogger # end 3b) # 4) place glsa's in STAGEDIR_repo_gentoo timelog_start "GLSA" | timelogger rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/glsa ${STAGEDIR_repo_gentoo}/metadata/ date -R -u > ${STAGEDIR_repo_gentoo}/metadata/glsa/timestamp.chk timelog___end "GLSA" | timelogger # end 4) # 5) place news in STAGEDIR_repo_gentoo timelog_start "NEWS" | timelogger rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/gentoo-news/. ${STAGEDIR_repo_gentoo}/metadata/news date -R -u > ${STAGEDIR_repo_gentoo}/metadata/news/timestamp.chk timelog___end "NEWS" | timelogger # end 5) # 6) place projects.xml in STAGEDIR_repo_gentoo timelog_start "HERDS-AND-PROJ" | timelogger rsync -Wqa --no-times --checksum ${EXPORTS}/projects/projects.xml ${STAGEDIR_repo_gentoo}/metadata/projects.xml timelog___end "HERDS-AND-PROJ" | timelogger # end 6) # 6a) thicken and generate MetaManifests timelog_start "THICKEN-META" | timelogger /usr/local/bin/gemato/utils/gen_fast_metamanifest.py \ "${STAGEDIR_repo_gentoo}" "${SIGNKEYID}" timelog___end "THICKEN-META" | timelogger # end 6a) # start 6b) Validate manifests timelog_start "MANIFEST-VALIDATE" | timelogger if ! /usr/local/bin/gemato/bin/gemato verify "${STAGEDIR_repo_gentoo}" >${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate 2>&1; then echo "$0: A Manifest has a failure!" echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate:" cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate" exit 5 fi timelog___end "MANIFEST-VALIDATE" | timelogger # end 6b) # 7) rsync from STAGEDIR_repo_gentoo to FINALDIR_repo_gentoo # note, call exit above if it is not desired to update the FINALDIR_repo_gentoo. This is # where all the rsync nodes pull from. # timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR_repo_gentoo # timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR_repo_gentoo # We want to track the progress of STAGEDIR_repo_gentoo->FINALDIR_repo_gentoo so exclude .chk file here # Checksum is needed because we explicitly set the timestamp of Manifests. # exclude of .git/CVS is just in case we still accidently created them. timelog_start "FINAL RSYNC" | timelogger FINALDIR_repo_gentoo_tmp=${FINALDIR_repo_gentoo%/} mkdir -p "${FINALDIR_repo_gentoo_tmp}-1" "${FINALDIR_repo_gentoo_tmp}-2" $atomic_rsync -Wqa --exclude=/metadata/timestamp.chk --delete --checksum \ --exclude=.git --exclude=CVS --exclude=.gitignore \ --chmod=u-s,g-s --no-times \ ${STAGEDIR_repo_gentoo}/ \ ${FINALDIR_repo_gentoo}/ # idea: "max-delete" - require infra approval if wc -l of bak - FINALDIR_repo_gentoo is > # 500? # The previous method of "detecting" some failure was --max-delete=100, this # exposed breakage to users though. #if [[ XXXX ]]; then # echo "Something went wrong, putting backup in place from last sync" # mv ${FINALDIR_repo_gentoo} ${FINALDIR_repo_gentoo}.broken # mv ${FINALDIR_repo_gentoo}.bak ${FINALDIR_repo_gentoo} #fi # Testcase for mirrors without --checksum: # If a mirror is running WITHOUT --checksum # They will get the file the first time around, but not any later versions. # The date is deliberately set for just before this experiment started, at a # very round timestamp, works out to 'Sun Jan 17 03:06:40 UTC 2016'. ts=1453000000 f=$FINALDIR_repo_gentoo/metadata/.checksum-test-marker cat >$f <