xref: /aosp_15_r20/build/make/core/dupcheck.sh (revision 9e94795a3d4ef5c1d47486f9a02bb378756cea8a)
1*9e94795aSAndroid Build Coastguard Worker#!/bin/bash
2*9e94795aSAndroid Build Coastguard Worker
3*9e94795aSAndroid Build Coastguard Worker# Find duplicate shared libraries by md5 checksum and possible duplicates by size.
4*9e94795aSAndroid Build Coastguard Worker# Results will be available in the out directory of the build.
5*9e94795aSAndroid Build Coastguard Worker# Usage:
6*9e94795aSAndroid Build Coastguard Worker# ./dupcheck.sh <out_dir> <image>
7*9e94795aSAndroid Build Coastguard Worker
8*9e94795aSAndroid Build Coastguard WorkerOUT_DIR="$1"
9*9e94795aSAndroid Build Coastguard WorkerIMG="$2"
10*9e94795aSAndroid Build Coastguard WorkerTMP_MD5="${OUT_DIR}/_dup_md5"
11*9e94795aSAndroid Build Coastguard WorkerTMP_SIZE="${OUT_DIR}/_dup_size"
12*9e94795aSAndroid Build Coastguard WorkerTMP_CHECK="${OUT_DIR}/_dup_tmp_check"
13*9e94795aSAndroid Build Coastguard WorkerTMP_SIZE_REAL="${OUT_DIR}/_dup_size_real"
14*9e94795aSAndroid Build Coastguard WorkerTMP_FILE1="${OUT_DIR}/_dup_f1"
15*9e94795aSAndroid Build Coastguard WorkerTMP_FILE2="${OUT_DIR}/_dup_f2"
16*9e94795aSAndroid Build Coastguard WorkerMD5_DUPLICATES="${OUT_DIR}/duplicate-libs-md5-${IMG}.txt"
17*9e94795aSAndroid Build Coastguard WorkerSIZE_DUPLICATES="${OUT_DIR}/duplicate-libs-size-${IMG}.txt"
18*9e94795aSAndroid Build Coastguard Worker
19*9e94795aSAndroid Build Coastguard Worker# Check arguments
20*9e94795aSAndroid Build Coastguard Workerif [ "$#" -ne 2 ]; then
21*9e94795aSAndroid Build Coastguard Worker	echo "Usage: ./dupcheck.sh <out_dir> <image>"
22*9e94795aSAndroid Build Coastguard Worker	exit 1
23*9e94795aSAndroid Build Coastguard Workerfi
24*9e94795aSAndroid Build Coastguard Worker
25*9e94795aSAndroid Build Coastguard Worker# Check host and toolchain version
26*9e94795aSAndroid Build Coastguard WorkerCHECK_HOST=$(uname)
27*9e94795aSAndroid Build Coastguard Workerif [ "${CHECK_HOST}" == "Linux" ]; then
28*9e94795aSAndroid Build Coastguard Worker	ARCH="linux-x86"
29*9e94795aSAndroid Build Coastguard Workerelse
30*9e94795aSAndroid Build Coastguard Worker	ARCH="darwin-x86"
31*9e94795aSAndroid Build Coastguard Workerfi
32*9e94795aSAndroid Build Coastguard WorkerBINUTILS_PATH="./prebuilts/clang/host/${ARCH}/llvm-binutils-stable"
33*9e94795aSAndroid Build Coastguard Worker
34*9e94795aSAndroid Build Coastguard Worker# Remove any old files if they exist.
35*9e94795aSAndroid Build Coastguard Workerif [ -f "${MD5_DUPLICATES}" ]; then
36*9e94795aSAndroid Build Coastguard Worker	rm "${MD5_DUPLICATES}"
37*9e94795aSAndroid Build Coastguard Workerfi
38*9e94795aSAndroid Build Coastguard Worker
39*9e94795aSAndroid Build Coastguard Workerif [ -f "${SIZE_DUPLICATES}" ]; then
40*9e94795aSAndroid Build Coastguard Worker	rm "${SIZE_DUPLICATES}"
41*9e94795aSAndroid Build Coastguard Workerfi
42*9e94795aSAndroid Build Coastguard Worker
43*9e94795aSAndroid Build Coastguard Worker# Find all .so files and calculate their md5.
44*9e94795aSAndroid Build Coastguard Workerfind ./"${OUT_DIR}"/${IMG}/ -name "lib*.so" -type f -print0 | xargs -0 md5sum | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" | sed "s/^. *[0-9] //" > "${TMP_MD5}" 2>&1
45*9e94795aSAndroid Build Coastguard Worker
46*9e94795aSAndroid Build Coastguard Workerif [ -s "${TMP_MD5}" ]; then
47*9e94795aSAndroid Build Coastguard Worker	while read -r list; do
48*9e94795aSAndroid Build Coastguard Worker		checksum=$(echo "${list}" | cut -f1 -d ' ')
49*9e94795aSAndroid Build Coastguard Worker		filename=$(echo "${list}" | cut -f2 -d ' ')
50*9e94795aSAndroid Build Coastguard Worker		# For each md5, list the file paths that match.
51*9e94795aSAndroid Build Coastguard Worker		{
52*9e94795aSAndroid Build Coastguard Worker			echo "MD5: ${checksum}";											                \
53*9e94795aSAndroid Build Coastguard Worker			find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 md5sum | grep "${checksum}" | sed 's/^.* //';	\
54*9e94795aSAndroid Build Coastguard Worker			echo "";													                \
55*9e94795aSAndroid Build Coastguard Worker		} >> "${MD5_DUPLICATES}"
56*9e94795aSAndroid Build Coastguard Worker	done <"${TMP_MD5}"
57*9e94795aSAndroid Build Coastguard Workerelse
58*9e94795aSAndroid Build Coastguard Worker	echo "No duplicate files by md5 found." >> "${MD5_DUPLICATES}"
59*9e94795aSAndroid Build Coastguard Workerfi
60*9e94795aSAndroid Build Coastguard Worker
61*9e94795aSAndroid Build Coastguard Worker# Cleanup
62*9e94795aSAndroid Build Coastguard Workerrm "${TMP_MD5}"
63*9e94795aSAndroid Build Coastguard Worker
64*9e94795aSAndroid Build Coastguard Worker# Find possible duplicate .so files by size.
65*9e94795aSAndroid Build Coastguard Workerfind ./"${OUT_DIR}"/${IMG}/ -name "*.so" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | sed -e "s# .*/# #" | sort | uniq -c | sort -g | sed "/^.*1 /d" > "${TMP_SIZE}" 2>&1
66*9e94795aSAndroid Build Coastguard Workerif [ -s "${TMP_SIZE}" ]; then
67*9e94795aSAndroid Build Coastguard Worker	while read -r list; do
68*9e94795aSAndroid Build Coastguard Worker		size=$(echo "${list}" | cut -f2 -d ' ')
69*9e94795aSAndroid Build Coastguard Worker		filename=$(echo "${list}" | cut -f3 -d ' ')
70*9e94795aSAndroid Build Coastguard Worker		# Check if the files are not in the md5sum list and do nothing if that is the case.
71*9e94795aSAndroid Build Coastguard Worker		find ./"${OUT_DIR}"/${IMG}/ -name "${filename}" -type f -print0 | xargs -0 stat --format="%s %n" 2>/dev/null | grep "${size}" | sed "s/^.* //" | sort > "${TMP_CHECK}" 2>&1
72*9e94795aSAndroid Build Coastguard Worker		while read -r filepath; do
73*9e94795aSAndroid Build Coastguard Worker			found=$(grep -F "${filepath}" "${MD5_DUPLICATES}")
74*9e94795aSAndroid Build Coastguard Worker			if [ -z "${found}" ]; then
75*9e94795aSAndroid Build Coastguard Worker				echo "${filepath}" >> "${TMP_SIZE_REAL}"
76*9e94795aSAndroid Build Coastguard Worker			fi
77*9e94795aSAndroid Build Coastguard Worker		done<"${TMP_CHECK}"
78*9e94795aSAndroid Build Coastguard Worker		# For every duplication found, diff the .note and .text sections.
79*9e94795aSAndroid Build Coastguard Worker		if [ -s "${TMP_SIZE_REAL}" ]; then
80*9e94795aSAndroid Build Coastguard Worker			{
81*9e94795aSAndroid Build Coastguard Worker				echo "File: ${filename}, Size: ${size}";	\
82*9e94795aSAndroid Build Coastguard Worker				cat "${TMP_SIZE_REAL}";				\
83*9e94795aSAndroid Build Coastguard Worker				echo "";					\
84*9e94795aSAndroid Build Coastguard Worker			} >> "${SIZE_DUPLICATES}"
85*9e94795aSAndroid Build Coastguard Worker			count=$(wc -l "${TMP_SIZE_REAL}" | cut -f1 -d ' ')
86*9e94795aSAndroid Build Coastguard Worker			# Limitation: this only works for file pairs. If more than two possible duplications are found, the user need to check manually
87*9e94795aSAndroid Build Coastguard Worker			# all the possible combinations using the llvm-readelf and llvm-objdump commands below.
88*9e94795aSAndroid Build Coastguard Worker			if [ "${count}" = 2 ]; then
89*9e94795aSAndroid Build Coastguard Worker				file1=$(head -n 1 "${TMP_SIZE_REAL}")
90*9e94795aSAndroid Build Coastguard Worker				file2=$(tail -n 1 "${TMP_SIZE_REAL}")
91*9e94795aSAndroid Build Coastguard Worker				# Check .note section
92*9e94795aSAndroid Build Coastguard Worker				${BINUTILS_PATH}/llvm-readelf --wide --notes "${file1}" > "${TMP_FILE1}" 2>&1
93*9e94795aSAndroid Build Coastguard Worker				${BINUTILS_PATH}/llvm-readelf --wide --notes "${file2}" > "${TMP_FILE2}" 2>&1
94*9e94795aSAndroid Build Coastguard Worker				{
95*9e94795aSAndroid Build Coastguard Worker					diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d";	\
96*9e94795aSAndroid Build Coastguard Worker					echo "";
97*9e94795aSAndroid Build Coastguard Worker				} >> "${SIZE_DUPLICATES}"
98*9e94795aSAndroid Build Coastguard Worker				# Check .text section
99*9e94795aSAndroid Build Coastguard Worker				${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file1}" | sed "1d;2d"> "${TMP_FILE1}" 2>&1
100*9e94795aSAndroid Build Coastguard Worker				${BINUTILS_PATH}/llvm-objdump --line-numbers --disassemble --demangle --reloc --no-show-raw-insn --section=.text "${file2}" | sed "1d;2d"> "${TMP_FILE2}" 2>&1
101*9e94795aSAndroid Build Coastguard Worker				{
102*9e94795aSAndroid Build Coastguard Worker					diff -u "${TMP_FILE1}" "${TMP_FILE2}" | sed "1d;2d;3d";	\
103*9e94795aSAndroid Build Coastguard Worker					echo "";
104*9e94795aSAndroid Build Coastguard Worker				} >> "${SIZE_DUPLICATES}"
105*9e94795aSAndroid Build Coastguard Worker				# Cleanup
106*9e94795aSAndroid Build Coastguard Worker				rm "${TMP_FILE1}" "${TMP_FILE2}"
107*9e94795aSAndroid Build Coastguard Worker			else
108*9e94795aSAndroid Build Coastguard Worker				echo "*Note: more than one duplicate. Manually verify all possible combinations." >> "${SIZE_DUPLICATES}"
109*9e94795aSAndroid Build Coastguard Worker			fi
110*9e94795aSAndroid Build Coastguard Worker			rm "${TMP_SIZE_REAL}"
111*9e94795aSAndroid Build Coastguard Worker			echo "" >> "${SIZE_DUPLICATES}"
112*9e94795aSAndroid Build Coastguard Worker		fi
113*9e94795aSAndroid Build Coastguard Worker	done <"${TMP_SIZE}"
114*9e94795aSAndroid Build Coastguard Worker	# Cleanup
115*9e94795aSAndroid Build Coastguard Worker	rm "${TMP_SIZE}" "${TMP_CHECK}"
116*9e94795aSAndroid Build Coastguard Workerelse
117*9e94795aSAndroid Build Coastguard Worker	echo "No duplicate files by size found." >> "${SIZE_DUPLICATES}"
118*9e94795aSAndroid Build Coastguard Workerfi
119