#!/bin/bash # Copyright 2005-2015 Gentoo Foundation; Distributed under the GPL v2 # might be earlier copyright, no history available # 0) Make a backup incase there is something wrong with server side # 1) Update rsync checkouts or exports from cvs.g.o::mastermirror-staging # - cvs.g.o::mastermirror-staging is populated by script mastermirror-staging.sh # 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR # 1c) source (bash -n) eclasses to check for syntax errors # 2) generate metadata (egencache) # 3) place dtd info in STAGEDIR # 4) place glsa's in STAGEDIR # 5) place news in STAGEDIR # 6) place herds.xml in STAGEDIR # 7) rsync from STAGEDIR to FINALDIR umask 022 source /usr/local/bin/mastermirror/rsync-gen.vars # We want to exclude many RCS files, but 'core' is actually a valid package # name that we do use for dev-ml/core. RSYNC_GIT_EXCLUDE="--include core/ --cvs-exclude --exclude=.gitignore" #--filter='dir-merge /.cvsignore' --filter='exclude .cvsignore' #--filter='dir-merge /.gitignore' --filter='exclude .gitignore' # sanity checking [[ -d ${EXPORTS} ]] || mkdir ${EXPORTS} [[ -d ${STAGEDIR} ]] || mkdir ${STAGEDIR} [[ -d ${FINALDIR} ]] || mkdir ${FINALDIR} [[ -d ${LOGDIR} ]] || mkdir ${LOGDIR} [[ -d ${BASE}/tmp ]] || mkdir ${BASE}/tmp TIMESLOG="${LOGDIR}/rsync-gen-times.log" echo "---------------------------------------------------------" >> ${TIMESLOG} echo "START ENTIRE SCRIPT $(date -u)" >> ${TIMESLOG} # 0) Make a backup incase there is something wrong with server side #echo "START BACKUP $(date -u)" >> ${TIMESLOG} #cp -al ${FINALDIR}/ ${FINALDIR}.bak/ #echo "END BACKUP $(date -u)" >> ${TIMESLOG} # 1) Update rsync checkouts or exports from cvs.g.o::mastermirror-staging PASSWD_FILE="/etc/mastermirror-fetch/gcvsd-rsync.rsync.passwd" RSYNC="/usr/bin/rsync" RSYNC_ARGS="--no-motd --recursive --times --links --port=60024 --password-file ${PASSWD_FILE} --ignore-errors --timeout=300 --checksum" RSYNC_ARGS="${RSYNC_ARGS} --quiet" RSYNC_ARGS_DELETE="--delete --delete-after --delete-excluded" echo "START STAGING RSYNC $(date -u)" >> ${TIMESLOG} ${RSYNC} ${RSYNC_ARGS} ${RSYNC_ARGS_DELETE} git.gentoo.org::mastermirror-staging/ ${EXPORTS}/ echo "END STAGING RSYNC $(date -u)" >> ${TIMESLOG} # end 1) # 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR # timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR # timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR # We want to track the progress of STAGEDIR->FINALDIR so exclude .x file here # (so it isn't deleted) # Keep /metadata/cache around so the --rsync switch of egencache will work as # designed echo "START STAGEDIR RSYNC $(date -u)" >> ${TIMESLOG} rsync -Wqa \ $RSYNC_GIT_EXCLUDE \ --exclude=/metadata/timestamp.x \ --exclude=/metadata/md5-cache \ --exclude=ChangeLog \ --delete \ ${EXPORTS}/gentoo-x86/ ${STAGEDIR}/ || exit echo "END STAGEDIR RSYNC $(date -u)" >> ${TIMESLOG} # end 1b) # 1c) source (bash -n) eclasses to check for syntax errors echo "START ECLASS CHECK $(date -u)" >> ${TIMESLOG} for i in ${STAGEDIR}/eclass/*.eclass; do bash -n $i || { echo "failed to source $i, exiting"; exit 1; } done echo "END ECLASS CHECK $(date -u)" >> ${TIMESLOG} # end 1c) # start 1d) # Temporary: import old CVS changelogs echo "START CHANGELOG-IMPORT $(date -u)" >> ${TIMESLOG} for file in $EXPORTS/changelogs/*/*/ChangeLog*; do # remove the prefix so we can copy file=${file#$EXPORTS/changelogs/} dir=$(dirname "$file") # do the copy, the stderr redirect is for changelogs that no longer have an ebuild # (which is why rsync is not used, as it would create a directory/package when none should exist) [ -d "${STAGEDIR}/${dir}" ] && cp -a $EXPORTS/changelogs/${file} ${STAGEDIR}/${file} done echo "END CHANGELOG-IMPORT $(date -u)" >> ${TIMESLOG} # end 1d) # 2) generate metadata (egencache) # ${STAGEDIR}/metadata/cache is created automatically export PORTAGE_USERNAME=gmirror PORTAGE_GRPNAME=gmirror LOG_TIMESTAMP=$(date -u '+%Y%m%d-%H%M') REGEN_LOG_FILE=regen-run-${LOG_TIMESTAMP}.log REGEN_LOG_DIR=${LOGDIR}/regen [[ -d ${REGEN_LOG_DIR} ]] || mkdir ${REGEN_LOG_DIR} function parallel_repoman_manifest_THICKEN() { /usr/local/bin/mastermirror/thicken-manifests.py \ ${PARALLEL_PARAMS_noloadavg} \ "${STAGEDIR}" } function parallel_repoman_manifest_REPOMAN() { find ${STAGEDIR} \ -maxdepth 1 -mindepth 1 \ -type d \ ! -name eclass \ ! -name scripts \ ! -name licenses \ ! -name metadata \ ! -name profiles \ | parallel --no-notice \ ${PARALLEL_PARAMS/--load-average/--load} \ 'cd {} && repoman manifest' \ >>${REGEN_LOG_DIR}/${REGEN_LOG_FILE} } # TODO: Implement a monitoring system that guarantees low-latency human # intervention when necessary, and stop using --tolerant (bug #239266). echo "START REGEN $(date -u)" >> ${TIMESLOG} # Force Manifests to thick, unsigned # also disable commit signing for now. # TODO: add infra signing of Manifests sed -i \ -e '/^thin-manifests/s,true,false,g' \ -e '/^sign-manifests/s,true,false,g' \ -e '/^sign-commits/s,true,false,g' \ ${STAGEDIR}/metadata/layout.conf parallel_repoman_manifest_REPOMAN # Only update the changelogs every 6 hours # because right now it takes a very long time to do it (exceeding 1 hour at times). HOURS=$(date +%H) EGENCACHE_CHANGELOG="" case $HOURS in 3|9|15|21) EGENCACHE_CHANGELOG="--update-changelogs --changelog-reversed --changelog-output ChangeLog" ;; esac # for egencache, set user/group or make sure the user is in the portage group #--update-changelogs \ GIT_DIR=${EXPORTS}/gentoo-x86/.git/ \ egencache --update --rsync $PARALLEL_PARAMS \ --tolerant --cache-dir=${BASE}/tmp/ \ --portdir=${STAGEDIR} \ --update-use-local-desc \ --update-manifests --thin-manifests=n \ --repo=gentoo \ $EGENCACHE_CHANGELOG \ >> ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} 2>&1 rval=$? echo "END REGEN $(date -u)" >> ${TIMESLOG} if [[ ${rval} != 0 ]]; then echo "$0: something happened with egencache, cowardly refusing to continue" echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}:" cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}" exit 5 fi # Redo the Manifests, because the ChangeLog is not added to the Manifest for # some reason. parallel_repoman_manifest_REPOMAN # don't save empty files if [[ ! -s ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} ]]; then rm ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} fi # Keep 30 days of logs find ${REGEN_LOG_DIR} -type f -mtime +30 -print0 | xargs -0r rm # Mark that metadata is done date -u > ${STAGEDIR}/metadata/timestamp echo "START MANIFEST-VALIDATE $(date -u)" >> ${TIMESLOG} cd ${STAGEDIR} PORTDIR=${STAGEDIR} repoman manifest-check 2>&1 >${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate rval=$? if [[ ${rval} != 0 ]]; then echo "$0: A Manifest has a failure!" echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate:" cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate" exit 5 fi echo "END MANIFEST-VALIDATE $(date -u)" >> ${TIMESLOG} # end 2) # 3) place dtd info in STAGEDIR echo "START DTD $(date -u)" >> ${TIMESLOG} rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/dtd ${STAGEDIR}/metadata/ date -R -u > ${STAGEDIR}/metadata/dtd/timestamp.chk echo "END DTD $(date -u)" >> ${TIMESLOG} # end 3) # 4) place glsa's in STAGEDIR echo "START GLSA $(date -u)" >> ${TIMESLOG} rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/glsa ${STAGEDIR}/metadata/ date -R -u > ${STAGEDIR}/metadata/glsa/timestamp.chk echo "END GLSA $(date -u)" >> ${TIMESLOG} # end 4) # 5) place news in STAGEDIR echo "START NEWS $(date -u)" >> ${TIMESLOG} rsync -Wqa --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/gentoo-news/. ${STAGEDIR}/metadata/news date -R -u > ${STAGEDIR}/metadata/news/timestamp.chk echo "END NEWS $(date -u)" >> ${TIMESLOG} # end 5) # 6) place herds.xml in STAGEDIR echo "START HERDS $(date -u)" >> ${TIMESLOG} rsync -Wqa --checksum ${EXPORTS}/herds/herds.xml ${STAGEDIR}/metadata/herds.xml echo "END HERDS $(date -u)" >> ${TIMESLOG} # end 6) # 7) rsync from STAGEDIR to FINALDIR # note, call exit above if it is not desired to update the FINALDIR. This is # where all the rsync nodes pull from. # timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR # timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR # We want to track the progress of STAGEDIR->FINALDIR so exclude .chk file here # Checksum is needed because we explicitly set the timestamp of Manifests/ChangeLogs. # exclude of .git/CVS is just in case we still accidently created them. echo "START FINAL RSYNC $(date -u)" >> ${TIMESLOG} for atomic_rsync in `which atomic-rsync 2>/dev/null` /usr/share/rsync/atomic-rsync MISSING ; do [ -x $atomic_rsync ] && break done if [ "$atomic_rsync" == "MISSING" ]; then echo "$0: Cannot do final atomic rsync into place, atomic-rsync tool is missing" exit 1 fi FINALDIR_tmp=${FINALDIR%/} mkdir -p "${FINALDIR_tmp}-1" "${FINALDIR_tmp}-2" $atomic_rsync -Wqa --exclude=/metadata/timestamp.chk --delete --checksum \ --exclude=.git --exclude=CVS \ --chmod=u-s,g-s \ ${STAGEDIR}/ \ ${FINALDIR}/ # idea: "max-delete" - require infra approval if wc -l of bak - finaldir is > # 500? # The previous method of "detecting" some failure was --max-delete=100, this # exposed breakage to users though. #if [[ XXXX ]]; then # echo "Something went wrong, putting backup in place from last sync" # mv ${FINALDIR} ${FINALDIR}.broken # mv ${FINALDIR}.bak ${FINALDIR} #fi echo "END FINAL RSYNC $(date -u)" >> ${TIMESLOG} # end 7) # 8) cache generated data: # md5-cache # ChangeLog echo "START CACHE RSYNC $(date -u)" >> ${TIMESLOG} CACHEDIR_md5cache=${CACHEDIR}/metadata_md5-cache CACHEDIR_changelogs=${CACHEDIR}/changelogs mkdir -p $CACHEDIR_md5cache $CACHEDIR_changelogs # MD5 cache rsync -Wqatm --delete ${STAGEDIR}/metadata/md5-cache/ $CACHEDIR_md5cache/ # ChangeLogs: we do NOT use --delete, as we want to keep old ChangeLogs rsync -Wqatm --include 'ChangeLog' --include='*/' --exclude='*' ${STAGEDIR}/ $CACHEDIR_changelogs/ echo "END CACHE RSYNC $(date -u)" >> ${TIMESLOG} echo "END ENTIRE SCRIPT $(date -u)" >> ${TIMESLOG}