[Unicode]   Unicode Localization Interoperability Technical Committee : Bug Tracking Home | Site Map | Search
 

source: trunk/abbrs/src/xls2json.py @ 33

Revision 33, 4.1 KB checked in by srloomis, 16 months ago (diff)

ticket:7336: add XML format and update xls2json

Line 
1# -*- coding: utf-8 -*-
2
3#
4# need xlrd - use 'easy_install xlrd'  - see http://www.python-excel.org/
5
6import sys
7reload(sys)
8sys.setdefaultencoding("utf-8")
9
10from xlrd import open_workbook
11import json
12import os
13
14dbg = False
15
16locs = []
17
18# doing it wrong.
19files = os.walk('../xls')
20for ent in files:
21    (path,dirs,files) = ent
22    if(path.find("/.svn") != -1):
23        continue
24    for file in files:
25        if(file.endswith('.xls')):
26            locs.append(file.split('.')[0])
27
28#testing
29#locs = ['ru']
30
31for loc in locs:
32    print 'Locale: %s' % (loc)
33    exceptionEntries = set()
34    nonExceptionEntries = set()
35    wbfn = '../xls/%s.xls' % (loc)
36    wb = open_workbook(wbfn)
37    nrows = 0
38    for s in wb.sheets():
39        header = []
40        for col in range(s.ncols):
41            header.append(s.cell(0,col).value)
42
43        # the header row counts
44        nrows = nrows + 1
45
46        # short sheet?
47        if(len(header)==0):
48            continue
49        print ' Sheet Header: %s' % (','.join(header))
50
51        # now, find the headers we want
52        # Entry example,Full entry name,Example tested,isException,Note
53        entryHeader = -1
54        exceptionHeader = -1
55
56        for i in range(len(header)):
57            if header[i]=='Entry example':
58                entryHeader = i
59            elif header[i]=='Abbreviation':
60                entryHeader = i
61            elif header[i]=='isException':
62                exceptionHeader = i
63            elif header[i]=='Exception?':
64                exceptionHeader = i
65
66        if(entryHeader==-1 or exceptionHeader==-1):
67            print '   Skipping this sheet: could not find entryHeader and exceptionHeader in %s' % (loc)
68            continue
69            # exit?
70   
71        #print "EntryHeader %d, exceptionHeader %d" % (entryHeader,exceptionHeader)
72
73        rows = []
74        for row in range(1,s.nrows):
75            nrows = nrows + 1
76            values = []
77            for col in range(s.ncols):
78                values.append(s.cell(row,col).value)
79            rows.append(values)
80
81            entry = str(values[entryHeader]).strip()
82
83            if(len(entry)==0):
84                if dbg:
85                    print "Skipping empty value on row %d" % row
86                continue
87
88            isException = str(values[exceptionHeader]).strip()
89
90            if(len(isException)==0):
91                if dbg:
92                    print "Skipping empty isException value on row %d" % row
93                continue
94
95            if(isException == 'Yes'):
96                exc = True
97            elif(isException == 'yes'):
98                exc = True
99            elif(isException == 'No'):
100                exc = False
101            elif(isException == 'no'):
102                exc = False
103            else:
104                print 'Row %d: Unknown true/false value "%s"' % (row, isException)
105                exc = True
106
107            #print "Entry %s, exception %s" % (entry,exc)
108            #entries.append((entry,exc))
109            if(exc):
110                exceptionEntries.add(entry)
111            else:
112                nonExceptionEntries.add(entry)
113    # unique (just for statistics) - union
114    uniqueEntries = exceptionEntries | nonExceptionEntries
115    # remaining:  the exceptions, MINUS those which are also non-exceptions.
116    remainEntries = exceptionEntries - nonExceptionEntries
117
118    print "Locale %s: %d rows processed, %d exception entries, %d nonexception (%d unique) - %d total usable" % (loc, nrows, len(exceptionEntries), len(nonExceptionEntries), len(uniqueEntries), len(remainEntries))
119
120    #print 'Entries: ','|'.join(remainEntries)
121    data = {};
122    data['abbrs'] = list(remainEntries);
123    data['abbrs'].sort()
124    jsonOut = { 'about': { 'id': loc, 'comment': "COMMENT" }, 'data': data };
125    fn = '../json/%s.json' % (loc)
126    f = open(fn, 'wb')
127    jsonOut['about']['comment'] = "Generated from %s - %d rows processed, %d exception entries, %d nonexception (%d unique) - %d total usable" % (wbfn, nrows, len(exceptionEntries), len(nonExceptionEntries), len(uniqueEntries), len(remainEntries))
128    print >>f, json.dumps(jsonOut, sort_keys=True, indent=4)
129    print "*** Wrote %s with %d entries" % (fn,len(remainEntries))
130    print
131   
Note: See TracBrowser for help on using the repository browser.