xref: /aosp_15_r20/external/cldr/tools/scripts/cldr-svnprops-check.py (revision 912701f9769bb47905792267661f0baf2b85bed5)
1*912701f9SAndroid Build Coastguard Worker#! /usr/bin/python
2*912701f9SAndroid Build Coastguard Worker
3*912701f9SAndroid Build Coastguard Worker# Copyright (C) 2009-2012, International Business Machines Corporation, Google and Others.
4*912701f9SAndroid Build Coastguard Worker# All rights reserved.
5*912701f9SAndroid Build Coastguard Worker
6*912701f9SAndroid Build Coastguard Worker#
7*912701f9SAndroid Build Coastguard Worker#  Script to check and fix svn property settings for CLDR source files.
8*912701f9SAndroid Build Coastguard Worker#  This script is a modified version of ICU's icu-svnprops-check.py.
9*912701f9SAndroid Build Coastguard Worker#  Also check for the correct line endings on files with svn:eol-style = native
10*912701f9SAndroid Build Coastguard Worker#
11*912701f9SAndroid Build Coastguard Worker#  THIS SCRIPT DOES NOT WORK ON WINDOWS
12*912701f9SAndroid Build Coastguard Worker#     It only works correctly on platforms where the native line ending is a plain \n
13*912701f9SAndroid Build Coastguard Worker#
14*912701f9SAndroid Build Coastguard Worker#  usage:
15*912701f9SAndroid Build Coastguard Worker#     cldr-svnprops-check.py  [options]
16*912701f9SAndroid Build Coastguard Worker#
17*912701f9SAndroid Build Coastguard Worker#  options:
18*912701f9SAndroid Build Coastguard Worker#     -f | --fix     Fix any problems that are found
19*912701f9SAndroid Build Coastguard Worker#     -h | --help    Print a usage line and exit.
20*912701f9SAndroid Build Coastguard Worker#
21*912701f9SAndroid Build Coastguard Worker#  The tool operates recursively on the directory from which it is run.
22*912701f9SAndroid Build Coastguard Worker#  Only files from the svn repository are checked.
23*912701f9SAndroid Build Coastguard Worker#  No changes are made to the repository; only the working copy will be altered.
24*912701f9SAndroid Build Coastguard Worker
25*912701f9SAndroid Build Coastguard Workerimport sys
26*912701f9SAndroid Build Coastguard Workerimport os
27*912701f9SAndroid Build Coastguard Workerimport os.path
28*912701f9SAndroid Build Coastguard Workerimport re
29*912701f9SAndroid Build Coastguard Workerimport getopt
30*912701f9SAndroid Build Coastguard Worker
31*912701f9SAndroid Build Coastguard Worker#
32*912701f9SAndroid Build Coastguard Worker#  svn autoprops definitions.
33*912701f9SAndroid Build Coastguard Worker#      Copy and paste here the ICU recommended auto-props from
34*912701f9SAndroid Build Coastguard Worker#      http://icu-project.org/docs/subversion_howto/index.html
35*912701f9SAndroid Build Coastguard Worker#
36*912701f9SAndroid Build Coastguard Worker#  This program will parse this autoprops string, and verify that files in
37*912701f9SAndroid Build Coastguard Worker#  the repository have the recommeded properties set.
38*912701f9SAndroid Build Coastguard Worker#
39*912701f9SAndroid Build Coastguard Workersvn_auto_props = """
40*912701f9SAndroid Build Coastguard Worker### Section for configuring automatic properties.
41*912701f9SAndroid Build Coastguard Worker[auto-props]
42*912701f9SAndroid Build Coastguard Worker### The format of the entries is:
43*912701f9SAndroid Build Coastguard Worker###   file-name-pattern = propname[=value][;propname[=value]...]
44*912701f9SAndroid Build Coastguard Worker### The file-name-pattern can contain wildcards (such as '*' and
45*912701f9SAndroid Build Coastguard Worker### '?').  All entries which match will be applied to the file.
46*912701f9SAndroid Build Coastguard Worker### Note that auto-props functionality must be enabled, which
47*912701f9SAndroid Build Coastguard Worker### is typically done by setting the 'enable-auto-props' option.
48*912701f9SAndroid Build Coastguard Worker*.c = svn:eol-style=native
49*912701f9SAndroid Build Coastguard Worker*.cc = svn:eol-style=native
50*912701f9SAndroid Build Coastguard Worker*.cpp = svn:eol-style=native
51*912701f9SAndroid Build Coastguard Worker*.h = svn:eol-style=native
52*912701f9SAndroid Build Coastguard Worker*.rc = svn:eol-style=native
53*912701f9SAndroid Build Coastguard Worker*.dsp = svn:eol-style=native
54*912701f9SAndroid Build Coastguard Worker*.dsw = svn:eol-style=native
55*912701f9SAndroid Build Coastguard Worker*.sln = svn:eol-style=native
56*912701f9SAndroid Build Coastguard Worker*.vcproj = svn:eol-style=native
57*912701f9SAndroid Build Coastguard Workerconfigure = svn:eol-style=native;svn:executable
58*912701f9SAndroid Build Coastguard Worker*.sh = svn:eol-style=native;svn:executable
59*912701f9SAndroid Build Coastguard Worker*.pl = svn:eol-style=native;svn:executable
60*912701f9SAndroid Build Coastguard Worker*.py = svn:eol-style=native;svn:executable
61*912701f9SAndroid Build Coastguard Worker*.txt = svn:mime-type=text/plain;svn:eol-style=native
62*912701f9SAndroid Build Coastguard Worker*.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
63*912701f9SAndroid Build Coastguard Worker*.ucm = svn:eol-style=native
64*912701f9SAndroid Build Coastguard Worker*.html = svn:eol-style=native;svn:mime-type=text/html
65*912701f9SAndroid Build Coastguard Worker*.htm = svn:eol-style=native;svn:mime-type=text/html
66*912701f9SAndroid Build Coastguard Worker*.xml = svn:eol-style=native
67*912701f9SAndroid Build Coastguard WorkerMakefile = svn:eol-style=native
68*912701f9SAndroid Build Coastguard Worker*.in = svn:eol-style=native
69*912701f9SAndroid Build Coastguard Worker*.mak = svn:eol-style=native
70*912701f9SAndroid Build Coastguard Worker*.mk = svn:eol-style=native
71*912701f9SAndroid Build Coastguard Worker*.png = svn:mime-type=image/png
72*912701f9SAndroid Build Coastguard Worker*.jpeg = svn:mime-type=image/jpeg
73*912701f9SAndroid Build Coastguard Worker*.jpg = svn:mime-type=image/jpeg
74*912701f9SAndroid Build Coastguard Worker*.bin = svn:mime-type=application/octet-stream
75*912701f9SAndroid Build Coastguard Worker*.brk = svn:mime-type=application/octet-stream
76*912701f9SAndroid Build Coastguard Worker*.cnv = svn:mime-type=application/octet-stream
77*912701f9SAndroid Build Coastguard Worker*.dat = svn:mime-type=application/octet-stream
78*912701f9SAndroid Build Coastguard Worker*.icu = svn:mime-type=application/octet-stream
79*912701f9SAndroid Build Coastguard Worker*.res = svn:mime-type=application/octet-stream
80*912701f9SAndroid Build Coastguard Worker*.spp = svn:mime-type=application/octet-stream
81*912701f9SAndroid Build Coastguard Worker# new additions 2007-dec-5 srl
82*912701f9SAndroid Build Coastguard Worker*.rtf = mime-type=text/rtf
83*912701f9SAndroid Build Coastguard Worker*.pdf = mime-type=application/pdf
84*912701f9SAndroid Build Coastguard Worker# changed 2008-04-08: modified .txt, above, adding mime-type
85*912701f9SAndroid Build Coastguard Worker# changed 2010-11-09: modified .java, adding mime-type
86*912701f9SAndroid Build Coastguard Worker# Note: The escape syntax for semicolon (";;") is supported since subversion 1.6.1
87*912701f9SAndroid Build Coastguard Worker"""
88*912701f9SAndroid Build Coastguard Worker
89*912701f9SAndroid Build Coastguard Worker
90*912701f9SAndroid Build Coastguard Worker# file_types:  The parsed form of the svn auto-props specification.
91*912701f9SAndroid Build Coastguard Worker#              A list of file types - .cc, .cpp, .txt, etc.
92*912701f9SAndroid Build Coastguard Worker#              each element is a [type, proplist]
93*912701f9SAndroid Build Coastguard Worker#              "type" is a regular expression string that will match a file name
94*912701f9SAndroid Build Coastguard Worker#              prop list is another list, one element per property.
95*912701f9SAndroid Build Coastguard Worker#              Each property item is a two element list, [prop name, prop value]
96*912701f9SAndroid Build Coastguard Workerfile_types = list()
97*912701f9SAndroid Build Coastguard Worker
98*912701f9SAndroid Build Coastguard Workerdef parse_auto_props():
99*912701f9SAndroid Build Coastguard Worker    aprops = svn_auto_props.splitlines()
100*912701f9SAndroid Build Coastguard Worker    for propline in aprops:
101*912701f9SAndroid Build Coastguard Worker        if re.match("\s*(#.*)?$", propline):         # Match comment and blank lines
102*912701f9SAndroid Build Coastguard Worker            continue
103*912701f9SAndroid Build Coastguard Worker        if re.match("\s*\[auto-props\]", propline):  # Match the [auto-props] line.
104*912701f9SAndroid Build Coastguard Worker            continue
105*912701f9SAndroid Build Coastguard Worker        if not re.match("\s*[^\s]+\s*=", propline):  # minimal syntax check for <file-type> =
106*912701f9SAndroid Build Coastguard Worker            print "Bad line from autoprops definitions: " + propline
107*912701f9SAndroid Build Coastguard Worker            continue
108*912701f9SAndroid Build Coastguard Worker        file_type, string_proplist = propline.split("=", 1)
109*912701f9SAndroid Build Coastguard Worker
110*912701f9SAndroid Build Coastguard Worker        #transform the file type expression from autoprops into a normal regular expression.
111*912701f9SAndroid Build Coastguard Worker        #  e.g.  "*.cpp"  ==>  ".*\.cpp$"
112*912701f9SAndroid Build Coastguard Worker        file_type = file_type.strip()
113*912701f9SAndroid Build Coastguard Worker        file_type = file_type.replace(".", "\.")
114*912701f9SAndroid Build Coastguard Worker        file_type = file_type.replace("*", ".*")
115*912701f9SAndroid Build Coastguard Worker        file_type = file_type + "$"
116*912701f9SAndroid Build Coastguard Worker
117*912701f9SAndroid Build Coastguard Worker        # example string_proplist at this point: " svn:eol-style=native;svn:executable"
118*912701f9SAndroid Build Coastguard Worker        # split on ';' into a list of properties.  The negative lookahead and lookbehind
119*912701f9SAndroid Build Coastguard Worker        # in the split regexp are to prevent matching on ';;', which is an escaped ';'
120*912701f9SAndroid Build Coastguard Worker        # within a property value.
121*912701f9SAndroid Build Coastguard Worker        string_proplist = re.split("(?<!;);(?!;)", string_proplist)
122*912701f9SAndroid Build Coastguard Worker        proplist = list()
123*912701f9SAndroid Build Coastguard Worker        for prop in string_proplist:
124*912701f9SAndroid Build Coastguard Worker            if prop.find("=") >= 0:
125*912701f9SAndroid Build Coastguard Worker                prop_name, prop_val = prop.split("=", 1)
126*912701f9SAndroid Build Coastguard Worker            else:
127*912701f9SAndroid Build Coastguard Worker                # properties with no explicit value, e.g. svn:executable
128*912701f9SAndroid Build Coastguard Worker                prop_name, prop_val = prop, ""
129*912701f9SAndroid Build Coastguard Worker            prop_name = prop_name.strip()
130*912701f9SAndroid Build Coastguard Worker            prop_val = prop_val.strip()
131*912701f9SAndroid Build Coastguard Worker            # unescape any ";;" in a property value, e.g. the mime-type from
132*912701f9SAndroid Build Coastguard Worker            #    *.java = svn:eol-style=native;svn:mime-type=text/plain;;charset=utf-8
133*912701f9SAndroid Build Coastguard Worker            prop_val = prop_val.replace(";;", ";");
134*912701f9SAndroid Build Coastguard Worker            proplist.append((prop_name, prop_val))
135*912701f9SAndroid Build Coastguard Worker
136*912701f9SAndroid Build Coastguard Worker        file_types.append((file_type, proplist))
137*912701f9SAndroid Build Coastguard Worker    # print file_types
138*912701f9SAndroid Build Coastguard Worker
139*912701f9SAndroid Build Coastguard Worker
140*912701f9SAndroid Build Coastguard Workerdef runCommand(cmd):
141*912701f9SAndroid Build Coastguard Worker    output_file = os.popen(cmd);
142*912701f9SAndroid Build Coastguard Worker    output_text = output_file.read();
143*912701f9SAndroid Build Coastguard Worker    exit_status = output_file.close();
144*912701f9SAndroid Build Coastguard Worker    if exit_status:
145*912701f9SAndroid Build Coastguard Worker        print >>sys.stderr, '"', cmd, '" failed.  Exiting.'
146*912701f9SAndroid Build Coastguard Worker        sys.exit(exit_status)
147*912701f9SAndroid Build Coastguard Worker    return output_text
148*912701f9SAndroid Build Coastguard Worker
149*912701f9SAndroid Build Coastguard Worker
150*912701f9SAndroid Build Coastguard Workerdef usage():
151*912701f9SAndroid Build Coastguard Worker    print "usage: " + sys.argv[0] + " [-f | --fix] [-h | --help]"
152*912701f9SAndroid Build Coastguard Worker
153*912701f9SAndroid Build Coastguard Worker
154*912701f9SAndroid Build Coastguard Worker#
155*912701f9SAndroid Build Coastguard Worker#  UTF-8 file check.   For text files, add a charset to the mime-type if their contents are UTF-8
156*912701f9SAndroid Build Coastguard Worker#    file_name:        name of a text file.
157*912701f9SAndroid Build Coastguard Worker#    base_mime_type:   svn:mime-type property value from the auto-props file (no charset= part)
158*912701f9SAndroid Build Coastguard Worker#    actual_mime_type: existing svn:mime-type property value for the file.
159*912701f9SAndroid Build Coastguard Worker#    return:           svn:mime-type property value, with charset added when appropriate.
160*912701f9SAndroid Build Coastguard Worker#
161*912701f9SAndroid Build Coastguard Workerdef check_utf8(file_name, base_mime_type, actual_mime_type):
162*912701f9SAndroid Build Coastguard Worker
163*912701f9SAndroid Build Coastguard Worker    # If the file already has a charset in its mime-type, don't make any change.
164*912701f9SAndroid Build Coastguard Worker
165*912701f9SAndroid Build Coastguard Worker    if actual_mime_type.find("charset=") > 0:
166*912701f9SAndroid Build Coastguard Worker        return actual_mime_type;
167*912701f9SAndroid Build Coastguard Worker
168*912701f9SAndroid Build Coastguard Worker    f = open(file_name, 'r')
169*912701f9SAndroid Build Coastguard Worker    bytes = f.read()
170*912701f9SAndroid Build Coastguard Worker    f.close()
171*912701f9SAndroid Build Coastguard Worker
172*912701f9SAndroid Build Coastguard Worker    if all(ord(byte) < 128 for byte in bytes):
173*912701f9SAndroid Build Coastguard Worker        # pure ASCII.
174*912701f9SAndroid Build Coastguard Worker        # print "Pure ASCII " + file_name
175*912701f9SAndroid Build Coastguard Worker        return base_mime_type
176*912701f9SAndroid Build Coastguard Worker
177*912701f9SAndroid Build Coastguard Worker    try:
178*912701f9SAndroid Build Coastguard Worker        bytes.decode("UTF-8")
179*912701f9SAndroid Build Coastguard Worker    except UnicodeDecodeError:
180*912701f9SAndroid Build Coastguard Worker        print "warning: %s: not ASCII, not UTF-8" % file_name
181*912701f9SAndroid Build Coastguard Worker        return base_mime_type
182*912701f9SAndroid Build Coastguard Worker
183*912701f9SAndroid Build Coastguard Worker    if ord(bytes[0]) == 0xef:
184*912701f9SAndroid Build Coastguard Worker      print "UTF-8 file with BOM: " + file_name
185*912701f9SAndroid Build Coastguard Worker
186*912701f9SAndroid Build Coastguard Worker    # Append charset=utf-8.
187*912701f9SAndroid Build Coastguard Worker    return base_mime_type + ';charset=utf-8'
188*912701f9SAndroid Build Coastguard Worker
189*912701f9SAndroid Build Coastguard Worker
190*912701f9SAndroid Build Coastguard Workerdef main(argv):
191*912701f9SAndroid Build Coastguard Worker    fix_problems = False;
192*912701f9SAndroid Build Coastguard Worker    try:
193*912701f9SAndroid Build Coastguard Worker        opts, args = getopt.getopt(argv, "fh", ("fix", "help"))
194*912701f9SAndroid Build Coastguard Worker    except getopt.GetoptError:
195*912701f9SAndroid Build Coastguard Worker        print "unrecognized option: " + argv[0]
196*912701f9SAndroid Build Coastguard Worker        usage()
197*912701f9SAndroid Build Coastguard Worker        sys.exit(2)
198*912701f9SAndroid Build Coastguard Worker    for opt, arg in opts:
199*912701f9SAndroid Build Coastguard Worker        if opt in ("-h", "--help"):
200*912701f9SAndroid Build Coastguard Worker            usage()
201*912701f9SAndroid Build Coastguard Worker            sys.exit()
202*912701f9SAndroid Build Coastguard Worker        if opt in ("-f", "--fix"):
203*912701f9SAndroid Build Coastguard Worker            fix_problems = True
204*912701f9SAndroid Build Coastguard Worker    if args:
205*912701f9SAndroid Build Coastguard Worker        print "unexpected command line argument"
206*912701f9SAndroid Build Coastguard Worker        usage()
207*912701f9SAndroid Build Coastguard Worker        sys.exit()
208*912701f9SAndroid Build Coastguard Worker
209*912701f9SAndroid Build Coastguard Worker    parse_auto_props()
210*912701f9SAndroid Build Coastguard Worker    output = runCommand("svn ls -R ");
211*912701f9SAndroid Build Coastguard Worker    file_list = output.splitlines()
212*912701f9SAndroid Build Coastguard Worker
213*912701f9SAndroid Build Coastguard Worker    for f in file_list:
214*912701f9SAndroid Build Coastguard Worker        if os.path.isdir(f):
215*912701f9SAndroid Build Coastguard Worker            # print "Skipping dir " + f
216*912701f9SAndroid Build Coastguard Worker            continue
217*912701f9SAndroid Build Coastguard Worker        if not os.path.isfile(f):
218*912701f9SAndroid Build Coastguard Worker            print "Repository file not in working copy: " + f
219*912701f9SAndroid Build Coastguard Worker            continue;
220*912701f9SAndroid Build Coastguard Worker
221*912701f9SAndroid Build Coastguard Worker        for file_pattern, props in file_types:
222*912701f9SAndroid Build Coastguard Worker            if re.match(file_pattern, f):
223*912701f9SAndroid Build Coastguard Worker                # print "doing " + f
224*912701f9SAndroid Build Coastguard Worker                for propname, propval in props:
225*912701f9SAndroid Build Coastguard Worker                    actual_propval = runCommand("svn propget --strict " + propname + " '" + f + "'")
226*912701f9SAndroid Build Coastguard Worker                    #print propname + ": " + actual_propval
227*912701f9SAndroid Build Coastguard Worker                    if propname == "svn:mime-type" and propval.find("text/") == 0:
228*912701f9SAndroid Build Coastguard Worker                        # check for UTF-8 text files, should have svn:mime-type=text/something; charset=utf8
229*912701f9SAndroid Build Coastguard Worker                        propval = check_utf8(f, propval, actual_propval)
230*912701f9SAndroid Build Coastguard Worker                    if not (propval == actual_propval or (propval == "" and actual_propval == "*")):
231*912701f9SAndroid Build Coastguard Worker                        print "svn propset %s '%s' '%s'" % (propname, propval, f)
232*912701f9SAndroid Build Coastguard Worker                        if fix_problems:
233*912701f9SAndroid Build Coastguard Worker                            os.system("svn propset %s '%s' '%s'" % (propname, propval, f))
234*912701f9SAndroid Build Coastguard Worker                    if propname == "svn:eol-style" and propval == "native":
235*912701f9SAndroid Build Coastguard Worker                        if os.system("grep -q -v \r '" + f + "'"):
236*912701f9SAndroid Build Coastguard Worker                            if fix_problems:
237*912701f9SAndroid Build Coastguard Worker                                print f + ": Removing DOS CR characters."
238*912701f9SAndroid Build Coastguard Worker                                os.system("sed -i s/\r// '" + f + "'");
239*912701f9SAndroid Build Coastguard Worker                            else:
240*912701f9SAndroid Build Coastguard Worker                                print f + " contains DOS CR characters."
241*912701f9SAndroid Build Coastguard Worker
242*912701f9SAndroid Build Coastguard Worker
243*912701f9SAndroid Build Coastguard Workerif __name__ == "__main__":
244*912701f9SAndroid Build Coastguard Worker    main(sys.argv[1:])
245