[Unicode]   Unicode Localization Interoperability Technical Committee : Bug Tracking Home | Site Map | Search
 

source: trunk/abbrs/src/xls2txt.py @ 37

Revision 37, 2.3 KB checked in by srloomis, 10 months ago (diff)

ticket:7336: additional tool

  • Property svn:executable set to *
Line 
1#!/usr/bin/python
2# -*- coding: utf-8 -*-
3
4# create a '-unique.txt' file from each xls.
5#
6# need xlrd - use 'easy_install xlrd'  - see http://www.python-excel.org/
7
8import sys
9reload(sys)
10sys.setdefaultencoding("utf-8")
11
12from xlrd import open_workbook
13import json
14import os
15
16dbg = False
17
18files = []
19
20for wbfn in sys.argv[1::]:
21    print 'Opening File: %s' % (wbfn)
22    entries = set()
23    wb = open_workbook(wbfn)
24    nrows = 0
25    print '.. reading'
26    for s in wb.sheets():
27        header = []
28        for col in range(s.ncols):
29            header.append(s.cell(0,col).value)
30
31        # the header row counts
32        nrows = nrows + 1
33
34        # short sheet?
35        if(len(header)==0):
36            continue
37        print '.. Sheet Header: %s' % (','.join(header))
38
39        # now, find the headers we want
40        # Entry example,Full entry name,Example tested,isException,Note
41        entryHeader = -1
42        exceptionHeader = -1
43
44        for i in range(len(header)):
45            if header[i]=='Entry example':
46                entryHeader = i
47            elif header[i]=='Abbreviation':
48                entryHeader = i
49            elif header[i]=='isException':
50                exceptionHeader = i
51            elif header[i]=='Exception?':
52                exceptionHeader = i
53
54        if(entryHeader==-1):
55            print '   Skipping this sheet: could not find entryHeader and exceptionHeader in %s' % (loc)
56            continue
57            # exit?
58
59        #print "EntryHeader %d, exceptionHeader %d" % (entryHeader,exceptionHeader)
60
61        rows = []
62        for row in range(1,s.nrows):
63            nrows = nrows + 1
64            values = []
65            for col in range(s.ncols):
66                values.append(s.cell(row,col).value)
67            rows.append(values)
68
69            entry = str(values[entryHeader]).strip()
70
71            if(len(entry)==0):
72                if dbg:
73                    print "Skipping empty value on row %d" % row
74                continue
75
76            entries.add(entry)
77
78    print ".. Read %s: %d rows processed, %d entries unique. Writing.." % (wbfn, nrows, len(entries))
79
80    fn = '%s-unique.txt' % (wbfn)
81    f = open(fn, 'wb')
82    aslist = list(entries)
83    aslist.sort()
84    for entry in aslist:
85        print >>f, "%s" % (entry)
86    f.close()
87    print ".. Wrote %s with %d entries" % (fn,len(entries))
88    print
89
Note: See TracBrowser for help on using the repository browser.