aboutsummaryrefslogtreecommitdiff
blob: bdedc5ea8b10e7baab2552fd87e04e9a0ad82d0b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#!/bin/bash
# Copyright 2005-2015 Gentoo Foundation; Distributed under the GPL v2
# might be earlier copyright, no history available

# 0) Make a backup incase there is something wrong with server side
# 1) Update rsync checkouts or exports from cvs.g.o::mastermirror-staging
#    - cvs.g.o::mastermirror-staging is populated by script mastermirror-staging.sh
# 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR_repo_gentoo
# 1c) source (bash -n) eclasses to check for syntax errors
# 2) generate metadata (egencache)
# 3) place dtd info & xml schemas in STAGEDIR_repo_gentoo
# 4) place glsa's in STAGEDIR_repo_gentoo
# 5) place news in STAGEDIR_repo_gentoo
# 6) place projects.xml in STAGEDIR_repo_gentoo
# 7) rsync from STAGEDIR_repo_gentoo to FINALDIR_repo_gentoo
# 8) cache generated data
# 9) rsync from STAGEDIR_repo_gentoo to REPODIR ### testing

################################################################################
# NOTE: You MUST run git-restore-mtime -m on the git checkouts hosted in
# git.gentoo.org::mastermirror-staging/ or this WILL NOT WORK.
# You can get git-restore-mtime here:
# https://github.com/MestreLion/git-tools/blob/master/git-restore-mtime
################################################################################

umask 022
source /usr/local/bin/mastermirror/rsync-gen.vars
BINDIR=${BINDIR:=$(dirname $0)}
SIGNKEYID=${SIGNKEYID_rsync}

# block_start and block___end are named to keep
function _log() {
	logfmt_what_width=22
	logfmt_time_width=40
	ts="@$(date +%s)/$(date -u)"
	printf "%-5s %-${logfmt_what_width}s %${logfmt_time_width}s\n" "$1" "$2" "$ts"
}
function timelog_start() {
	_log "START" "$1"
}
function timelog___end() {
	_log "END" "$1"
}
function timelog_MARKER() {
	perl -e 'print "-"x70,"\n";'
}

# We want to exclude many RCS files, but we have some exceptions to the 'cvs-exclude' list:
# - 'core' is actually a valid package name that we do use for dev-ml/core.
# - '*.old' is used by 'net-p2p/freenet/files/freenet.old'
RSYNC_GIT_EXCLUDE="--include core/ --include '*.old' --cvs-exclude --exclude=.gitignore"
#--filter='dir-merge /.cvsignore' --filter='exclude .cvsignore'
#--filter='dir-merge /.gitignore' --filter='exclude .gitignore'

# sanity checking
[[ -d ${EXPORTS}  ]]		|| mkdir ${EXPORTS}
[[ -d ${STAGEDIR_repo_gentoo} ]]		|| mkdir ${STAGEDIR_repo_gentoo}
[[ -d ${FINALDIR_repo_gentoo} ]]		|| mkdir ${FINALDIR_repo_gentoo}
[[ -d ${REPODIR}  ]]		|| mkdir ${REPODIR}
[[ -d ${LOGDIR}   ]]		|| mkdir ${LOGDIR}
[[ -d ${BASE}/tmp ]]		|| mkdir ${BASE}/tmp
TIMESLOG="${LOGDIR}/rsync-gen-times.log"
for atomic_rsync in `which atomic-rsync 2>/dev/null` /usr/share/rsync/atomic-rsync MISSING ; do
  [ -x $atomic_rsync ] && break
done
if [ "$atomic_rsync" == "MISSING" ]; then
	echo "$0: Cannot do final atomic rsync into place, atomic-rsync tool is missing"
	exit 1
fi
for gemato in `which gemato 2>/dev/null` /usr/local/bin/gemato/bin/gemato MISSING ; do
  [ -x $gemato ] && break
done
if [ "$gemato " == "MISSING" ]; then
	echo "$0: Cannot verify metamanifests, gemato tool is missing (emerge gemato)"
	exit 1
fi
for gemato_gen_fast_metamanifest in /usr/share/gemato/gen_fast_metamanifest.py /usr/local/bin/gemato/utils/gen_fast_metamanifest.py MISSING ; do
  [ -x $gemato_gen_fast_metamanifest ] && break
done
if [ "$gemato_gen_fast_metamanifest" == "MISSING" ]; then
	echo "$0: Cannot generate metamanifests, gemato gen_fast_metamanifest.py tool is missing (emerge app-portage/gemato[tools,utils])"
	exit 1
fi

function timelogger() {
  tee -a "$TIMESLOG" | logger -t rsync-gen-times -p INFO
}

TIME_ENTIRE_START=$(date -u +%s)
timelog_MARKER | timelogger
timelog_start "ENTIRE SCRIPT" | timelogger

# Verify signing key is available
if [[ ! $(gpg --list-keys ${SIGNKEYID} | grep ${SIGNKEYID}) ]]; then
	echo "${SIGNKEYID} not imported! exiting"
	exit 1
fi

# 0) Make a backup incase there is something wrong with server side
#echo "START	BACKUP		$(date -u)" | timelogger
#cp -al ${FINALDIR_repo_gentoo}/ ${FINALDIR_repo_gentoo}.bak/
#echo "END	BACKUP		$(date -u)" | timelogger

# 1) Update checkouts from git.g.o
RSYNC="/usr/bin/rsync"

timelog_start "STAGING FROM GIT" | timelogger
$BINDIR/mastermirror-staging.sh
rc=$?
timelog___end "STAGING FROM GIT" | timelogger
if [[ $rc -ne 0 ]]; then
	timelog___end "ENTIRE SCRIPT - ABORTED DUE TO CHECKOUT FAILURE" | timelogger
	exit 1
fi
# end 1)

# 1b) rsync EXPORTS/gentoo-x86 to STAGEDIR_repo_gentoo

# timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR_repo_gentoo
# timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR_repo_gentoo
# We want to track the progress of STAGEDIR_repo_gentoo->FINALDIR_repo_gentoo so exclude .x file here
# (so it isn't deleted)
# Keep /metadata/cache around so the --rsync switch of egencache will work as
# designed
timelog_start "STAGEDIR_repo_gentoo RSYNC" | timelogger
rsync -Wqa --checksum --no-times \
	$RSYNC_GIT_EXCLUDE \
	--filter 'P /metadata/***' \
	--delete --delete-excluded \
	${EXPORTS}/gentoo-x86/ ${STAGEDIR_repo_gentoo}/ || exit
timelog___end "STAGEDIR_repo_gentoo RSYNC" | timelogger
# end 1b)

# metadata tree is protected, so we need to repeat potentially-stale
# Manifests separately (I couldn't get any R-rules to work)
find "${STAGEDIR_repo_gentoo}"/metadata '(' \
	-name 'Manifest' -o -name 'Manifest.gz' -o -name 'Manifest.files.gz' \
	')' -delete

# 1c) source (bash -n) eclasses to check for syntax errors
timelog_start "ECLASS CHECK" | timelogger
for i in ${STAGEDIR_repo_gentoo}/eclass/*.eclass; do
	bash -n $i || { echo "failed to source $i, exiting"; exit 1; }
done
timelog___end "ECLASS CHECK" | timelogger
# end 1c)

# 2) generate metadata (egencache)
# ${STAGEDIR_repo_gentoo}/metadata/cache is created automatically
export PORTAGE_USERNAME=gmirror PORTAGE_GRPNAME=gmirror
LOG_TIMESTAMP=$(date -u '+%Y%m%d-%H%M')
REGEN_LOG_FILE=regen-run-${LOG_TIMESTAMP}.log
REGEN_LOG_DIR=${LOGDIR}/regen
[[ -d ${REGEN_LOG_DIR} ]] || mkdir ${REGEN_LOG_DIR}

# TODO: Implement a monitoring system that guarantees low-latency human
# intervention when necessary, and stop using --tolerant (bug #239266).

timelog_start "REGEN" | timelogger

# Ensure layout.conf is set to thin-manifests so egencache doesn't error
sed -i \
	-e '/^thin-manifests/s,false,true,g' \
	-e '/^sign-manifests/s,true,false,g' \
	-e '/^sign-commits/s,false,true,g' \
	${STAGEDIR_repo_gentoo}/metadata/layout.conf

# for egencache, set user/group or make sure the user is in the portage group
GIT_DIR=${EXPORTS}/gentoo-x86.git/ \
egencache --update --rsync $PARALLEL_PARAMS \
	--tolerant --cache-dir=${BASE}/tmp/ \
	--repositories-configuration="
[gentoo]
location = ${STAGEDIR_repo_gentoo}
" \
	--update-use-local-desc \
	--repo=gentoo \
	>> ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} 2>&1
rval=$?
timelog___end "REGEN" | timelogger

if [[ ${rval} != 0 ]]; then
	echo "$0: something happened with egencache, cowardly refusing to continue"
	echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}:"
	cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}"
	exit 5
fi

# don't save empty files
if [[ ! -s ${REGEN_LOG_DIR}/${REGEN_LOG_FILE} ]]; then
	rm ${REGEN_LOG_DIR}/${REGEN_LOG_FILE}
fi

# Keep 30 days of logs
find ${REGEN_LOG_DIR} -type f -mtime +30 -print0 | xargs -0r rm

# Force Manifests to thick, unsigned
# also disable commit signing for now.
sed -i \
	-e '/^thin-manifests/s,true,false,g' \
	-e '/^sign-manifests/s,true,false,g' \
	-e '/^sign-commits/s,true,false,g' \
	${STAGEDIR_repo_gentoo}/metadata/layout.conf

# Mark that metadata is done
date -u > ${STAGEDIR_repo_gentoo}/metadata/timestamp
# end 2)

# 3) place dtd info in STAGEDIR_repo_gentoo
timelog_start "DTD" | timelogger
rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/dtd ${STAGEDIR_repo_gentoo}/metadata/
date -R -u > ${STAGEDIR_repo_gentoo}/metadata/dtd/timestamp.chk
timelog___end "DTD" | timelogger
# end 3)

# 3b) place xml schemas in STAGEDIR_repo_gentoo
timelog_start "XML-SCHEMA" | timelogger
rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/xml-schema ${STAGEDIR_repo_gentoo}/metadata/
date -R -u > ${STAGEDIR_repo_gentoo}/metadata/xml-schema/timestamp.chk
timelog___end "XML-SCHEMA" | timelogger
# end 3b)

# 4) place glsa's in STAGEDIR_repo_gentoo
timelog_start "GLSA" | timelogger
rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/glsa ${STAGEDIR_repo_gentoo}/metadata/
date -R -u > ${STAGEDIR_repo_gentoo}/metadata/glsa/timestamp.chk
timelog___end "GLSA" | timelogger
# end 4)

# 5) place news in STAGEDIR_repo_gentoo
timelog_start "NEWS" | timelogger
rsync -Wqa --no-times --checksum --exclude=CVS --exclude=.git --delete ${EXPORTS}/gentoo-news/. ${STAGEDIR_repo_gentoo}/metadata/news
date -R -u > ${STAGEDIR_repo_gentoo}/metadata/news/timestamp.chk
timelog___end "NEWS" | timelogger
# end 5)

# 6) place projects.xml in STAGEDIR_repo_gentoo
timelog_start "HERDS-AND-PROJ" | timelogger
rsync -Wqa --no-times --checksum ${EXPORTS}/projects/projects.xml ${STAGEDIR_repo_gentoo}/metadata/projects.xml
timelog___end "HERDS-AND-PROJ" | timelogger
# end 6)

# 6a) thicken and generate MetaManifests
timelog_start "THICKEN-META" | timelogger
$gemato_gen_fast_metamanifest \
	"${STAGEDIR_repo_gentoo}" "${SIGNKEYID}"
timelog___end "THICKEN-META" | timelogger
# end 6a)

# start 6b) Validate manifests
timelog_start "MANIFEST-VALIDATE" | timelogger
if ! $gemato verify "${STAGEDIR_repo_gentoo}" >${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate 2>&1; then
	echo "$0: A Manifest has a failure!"
	echo "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate:"
	cat "${REGEN_LOG_DIR}/${REGEN_LOG_FILE}.validate"
	exit 5
fi
timelog___end "MANIFEST-VALIDATE" | timelogger
# end 6b)

# 7) rsync from STAGEDIR_repo_gentoo to FINALDIR_repo_gentoo
# note, call exit above if it is not desired to update the FINALDIR_repo_gentoo. This is
# where all the rsync nodes pull from.

# timestamp.chk is generated every 5 mins in datestamp.sh script in the FINALDIR_repo_gentoo
# timestamp.x is generated every 5 mins in datestamp.sh script in the STAGEDIR_repo_gentoo
# We want to track the progress of STAGEDIR_repo_gentoo->FINALDIR_repo_gentoo so exclude .chk file here
# Checksum is needed because we explicitly set the timestamp of Manifests.
# exclude of .git/CVS is just in case we still accidently created them.
timelog_start "FINAL RSYNC" | timelogger
FINALDIR_repo_gentoo_tmp=${FINALDIR_repo_gentoo%/}
mkdir -p "${FINALDIR_repo_gentoo_tmp}-1" "${FINALDIR_repo_gentoo_tmp}-2"
$atomic_rsync -Wqa --exclude=/metadata/timestamp.chk --delete --checksum \
	--exclude=.git --exclude=CVS --exclude=.gitignore \
	--chmod=u-s,g-s --no-times \
	${STAGEDIR_repo_gentoo}/ \
	${FINALDIR_repo_gentoo}/

# idea: "max-delete" - require infra approval if wc -l of bak - FINALDIR_repo_gentoo is >
# 500?
# The previous method of "detecting" some failure was --max-delete=100, this
# exposed breakage to users though.
#if [[ XXXX ]]; then
#	echo "Something went wrong, putting backup in place from last sync"
#	mv ${FINALDIR_repo_gentoo} ${FINALDIR_repo_gentoo}.broken
#	mv ${FINALDIR_repo_gentoo}.bak ${FINALDIR_repo_gentoo}
#fi

# Testcase for mirrors without --checksum:
# If a mirror is running WITHOUT --checksum
# They will get the file the first time around, but not any later versions.
# The date is deliberately set for just before this experiment started, at a
# very round timestamp, works out to 'Sun Jan 17 03:06:40 UTC 2016'.
ts=1453000000 f=$FINALDIR_repo_gentoo/metadata/.checksum-test-marker
cat >$f <<EOF
$(date +%s) $(date -u)
# Testcase for mirror network:
# Watch the value over a day, if unchanging then
# one or more mirrors is missing --checksum
# Please check bug #572168 for status (review for duplicates).
EOF
touch -d @$ts $f
# End of mirror --checksum testcase

timelog___end "FINAL RSYNC" | timelogger
# end 7)

# 8) cache generated data:
# md5-cache
timelog_start "CACHE RSYNC" | timelogger
CACHEDIR_md5cache=${CACHEDIR}/metadata_md5-cache
mkdir -p $CACHEDIR_md5cache
# MD5 cache
rsync -Wqatm --delete ${STAGEDIR_repo_gentoo}/metadata/md5-cache/ $CACHEDIR_md5cache/
timelog___end "CACHE RSYNC" | timelogger

# 9) rsync from STAGEDIR_repo_gentoo to REPODIR ### testing
timelog_start "GIT RSYNC" | timelogger
rsync -Wqa --exclude=/metadata/timestamp.chk --delete --checksum \
	--exclude=.git --exclude=CVS --exclude=.gitignore \
	--chmod=u-s,g-s \
	${STAGEDIR_repo_gentoo}/ \
	${REPODIR}/ && \
( cd $REPODIR && \
git add -A && \
GIT_AUTHOR_NAME='Gentoo Infrastructure' \
GIT_AUTHOR_EMAIL='infra@gentoo.org' \
GIT_COMMITTER_NAME='Gentoo Infrastructure' \
GIT_COMMITTER_EMAIL='infra@gentoo.org' \
git commit -a -q -F /dev/fd/300 300<<-EOF
	rsync-gen.sh done!

	X-Started: @${TIME_ENTIRE_START}
	X-Finished: @$(date -u +%s)
EOF
)
timelog___end "GIT RSYNC" | timelogger
# end 9)

timelog___end "ENTIRE SCRIPT" | timelogger