[Unicode]   Unicode Localization Interoperability Technical Committee : Bug Tracking Home | Site Map | Search
 

source: trunk/abbrs/src/xls2json.py @ 16

Revision 16, 3.8 KB checked in by srloomis, 19 months ago (diff)

ticket:2 cook JSON, loading data from CLDR

Line 
1# -*- coding: utf-8 -*-
2
3#
4# need xlrd - use 'easy_install xlrd'  - see http://www.python-excel.org/
5
6import sys
7reload(sys)
8sys.setdefaultencoding("utf-8")
9
10from xlrd import open_workbook
11import json
12import os
13
14dbg = False
15
16locs = []
17
18# doing it wrong.
19files = os.walk('../xls')
20for ent in files:
21    (path,dirs,files) = ent
22    if(path.find("/.svn") != -1):
23        continue
24    for file in files:
25        if(file.endswith('.xls')):
26            locs.append(file.split('.')[0])
27
28#testing
29#locs = ['ru']
30
31for loc in locs:
32    print 'Locale: %s' % (loc)
33    exceptionEntries = set()
34    nonExceptionEntries = set()
35    wbfn = '../xls/%s.xls' % (loc)
36    wb = open_workbook(wbfn)
37    nrows = 0
38    for s in wb.sheets():
39        header = []
40        for col in range(s.ncols):
41            header.append(s.cell(0,col).value)
42
43        # the header row counts
44        nrows = nrows + 1
45
46        # short sheet?
47        if(len(header)==0):
48            continue
49        print ' Sheet Header: %s' % (','.join(header))
50
51        # now, find the headers we want
52        # Entry example,Full entry name,Example tested,isException,Note
53        entryHeader = -1
54        exceptionHeader = -1
55
56        for i in range(len(header)):
57            if header[i]=='Entry example':
58                entryHeader = i
59            elif header[i]=='Abbreviation':
60                entryHeader = i
61            elif header[i]=='isException':
62                exceptionHeader = i
63            elif header[i]=='Exception?':
64                exceptionHeader = i
65
66        if(entryHeader==-1 or exceptionHeader==-1):
67            print '   Skipping this sheet: could not find entryHeader and exceptionHeader in %s' % (loc)
68            continue
69            # exit?
70   
71        #print "EntryHeader %d, exceptionHeader %d" % (entryHeader,exceptionHeader)
72
73        rows = []
74        for row in range(1,s.nrows):
75            nrows = nrows + 1
76            values = []
77            for col in range(s.ncols):
78                values.append(s.cell(row,col).value)
79            rows.append(values)
80
81            entry = str(values[entryHeader])
82            isException = str(values[exceptionHeader])
83            if(isException == 'Yes'):
84                exc = True
85            elif(isException == 'yes'):
86                exc = True
87            elif(isException == 'No'):
88                exc = False
89            elif(isException == 'no'):
90                exc = False
91            else:
92                print 'Unknown true/false value %s' % (isException)
93                exc = True
94
95            #print "Entry %s, exception %s" % (entry,exc)
96            #entries.append((entry,exc))
97            if(exc):
98                exceptionEntries.add(entry)
99            else:
100                nonExceptionEntries.add(entry)
101    # unique (just for statistics) - union
102    uniqueEntries = exceptionEntries | nonExceptionEntries
103    # remaining:  the exceptions, MINUS those which are also non-exceptions.
104    remainEntries = exceptionEntries - nonExceptionEntries
105
106    print "Locale %s: %d rows processed, %d exception entries, %d nonexception (%d unique) - %d total usable" % (loc, nrows, len(exceptionEntries), len(nonExceptionEntries), len(uniqueEntries), len(remainEntries))
107
108    #print 'Entries: ','|'.join(remainEntries)
109    data = {};
110    data['abbrs'] = list(remainEntries);
111    data['abbrs'].sort()
112    jsonOut = { 'about': { 'id': loc, 'comment': "COMMENT" }, 'data': data };
113    fn = '../json/%s.json' % (loc)
114    f = open(fn, 'wb')
115    jsonOut['about']['comment'] = "Generated from %s - %d rows processed, %d exception entries, %d nonexception (%d unique) - %d total usable" % (wbfn, nrows, len(exceptionEntries), len(nonExceptionEntries), len(uniqueEntries), len(remainEntries))
116    print >>f, json.dumps(jsonOut, sort_keys=True, indent=4)
117    print "*** Wrote %s with %d entries" % (fn,len(remainEntries))
118    print
119   
Note: See TracBrowser for help on using the repository browser.