aboutsummaryrefslogtreecommitdiff
blob: ccff2d677623fafadf3cbb144136709659302d0f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/bin/bash
# Copyright 2014 Gentoo Foundation
# Distributed under the terms of the GNU GPL version 2 or later
# Author: Ulrich Müller <ulm@gentoo.org>

shopt -s extglob

portdir=$(portageq get_repo_path / gentoo)
cd "${portdir}" || exit 1

find . \( -path ./distfiles -o -path ./local -o -path ./metadata \
    -o -path ./packages \) -prune \
    -o ! -type d -exec file -ih '{}' + \
| while read line; do
    path=${line%:*}
    type=${line##*:*( )}
    case ${type} in
        "application/octet-stream; charset=binary" \
        | "application/octet-stream; charset=unknown" \
        | "binary; charset=binary")
            # GNU Info files (or patches to them) can contain the following
            # control characters that produce false positives:
            # - 0x1f, followed by LF or FF
            # - 0x7f (DEL), preceded by "Node:" in the same line
            # Filter such characters and reiterate
            line=$(sed -e 's/\x1f\f\?$//;/Node:/s/\x7f//' "${path}" | file -i -)
            type=${line##*:*( )}
            ;;
    esac
    case ${type} in
        text/*) ;;                            # text file
        application/*"; charset=us-ascii") ;;
        application/*"; charset=utf-8") ;;
        "image/svg+xml; charset=us-ascii") ;; # SVG image
        "image/x-xpmi; charset=us-ascii") ;;  # XPM image
        *)
            size=$(stat -c "%s" "${path}")
            echo "${path#./}: ${type} (size=${size})"
            ;;
    esac
done \
| sort