-----Original Message-----
From: Ehud Karni [mailto:[EMAIL PROTECTED]
> On Sun, 18 Apr 2004 21:17:18 +0300 (IDT), Geoffrey S. Mendelson <[EMAIL PROTECTED]>
> > wrote:
> >
> > Enclosed is a perl script I wrote (in simple easy to follow code) that
> > reads a file exported by morotola phone tools, and converts it. The
> > actual conversion is done in a subroutine that can be used elsewhere.
>
> Below is an Emacs command that will find current cellular phone numbers
> and possibly (with user approval) will replace it.
Below is a Python script that is multi-functional.
Usage:
To convert a single number:
./CellNum.py num=055-123456
To convert a Nokia Content Copier 1.3 file (part of Nokia Data
Suite 4.88 and maybe others) use (supply only the PhoneBook.ncc
and Calendar.ncc files):
./CellNum.py nokiafile=PhoneBook.ncc
To convert an excel spreadsheet by scanning the entire spreadsheet
and changing everything that looks like a phone number (WARNING
Win32 platform only):
CellNum.py excel=c:\temp\file.xls
For all conversions, you can add the keyword 'i18n' anywhere on
the command line and if the number was in the local notation
(055-123456) it will be converted to an international notation
(+97255-123456):
./CellNum.py num="055 123-456" i18n
The script recognizes the following formats:
+97255123456
+972055123456
97255123456
972055123456
01197255123456
055123456
55123456
and it tries to maintain formatting as much as possible, so
if you write:
./CellNum.py num="+972 (55) 123-456"
you will get:
+972 (54) 512-3456
If you wish, you can extend it for more file formats (I'll
leave CSV files as an excercise to the reader).
You can also use the convertion function in your own Python
scripts. The API is simple - it takes a string and returns a
string. If the conversion fails or is unnecessary it returns
the original string. If an optional 2nd parameter is True, i18n
conversion is performed.
import CellNum
...
sNum="055-123456"
print NumConvert(sNum,True)
-- Arik
--cut here--
#!/usr/bin/python
#
# Israeli cellular smart numbering system convertion
#
import os
import time
import codecs
def NumConvert(sNumber,bI18N=False):
"""
Detect the different parts of an Israeli phone number
Convert the number to the new system
Prefixes an international prefix (+972) if it doesn't exist (optional)
Retains as much as possible from the original format of the number
"""
dAddedDigit = { u'50':(u'50',u'5'), u'51':(u'50',u'7'), u'52':(u'52',u'2'),
u'53':(u'52',u'3'), u'54':(u'54',u'4'), u'55':(u'54',u'5'),
u'56':(u'50',u'6'), u'57':(u'57',u'7'), u'58':(u'52',u'8'),
u'64':(u'52',u'4'), u'65':(u'52',u'5'),
u'66':(u'54',u'6'), u'67':(u'54',u'7'), u'68':(u'50',u'8') }
sNumDigits = u'+1234567890'
sCellPrefix = u'56'
sRealNum=u''
lOrig=[]
# scan the original number, extract digits and link back to original position
for i in range(len(sNumber)):
if sNumber[i] in sNumDigits:
sRealNum+=sNumber[i]
lOrig.append(i)
# sanity
if len(sRealNum)<8:
return sNumber
# already internationalized?
bAlreadyI18N=True
bHasZero=True
# check for intel prefix like +972-55-987617
if sRealNum[0:4]==u'+972':
nPrefixStart=4
# fix misguided people who write '+972 (055) 987617'
if sRealNum[4]==u'0':
nPrefixStart=5
# check for intel prefix w/o the plus, like 972.55.987617
elif sRealNum[0:3]==u'972':
nPrefixStart=3
# fix misguided people who write '972.055.987617'
if sRealNum[3]==u'0':
nPrefixStart=4
# check for dialing from the US, like 011 (972) 55-987617
elif sRealNum[0:6]==u'011972':
nPrefixStart=6
# check for local prefix, like 055-987617
elif sRealNum[0]==u'0':
nPrefixStart=1
bAlreadyI18N=False
# check for poorly formatted numbers of the form (55) 987 617
elif len(sRealNum)==8:
nPrefixStart=0
bAlreadyI18N=False
bHasZero=False
# else unknown format
else:
return sNumber
# check for cellular
if not sRealNum[nPrefixStart] in sCellPrefix:
return sNumber
# extract the prefix component
sPrefix=sRealNum[nPrefixStart:nPrefixStart+2]
# verify that it's a cell number due for change
if not sPrefix in dAddedDigit:
return sNumber
# extract the suffix
nSuffixStart=nPrefixStart+2
sSuffix=sRealNum[nSuffixStart:]
# verify old length
if not len(sSuffix)==6:
return sNumber
# It's eligable for change!
sNewPrefix,sNewDigit = dAddedDigit[sPrefix]
## build the new number
sNewNumber=u''
nStartChar=0
# I18N?
if bI18N and not bAlreadyI18N:
sNewNumber=u'+972'
# if the first character was not a digit, add a space for better formatting
if lOrig[0]!=0:
sNewNumber+=u' '
# if the original number has a zero, move past it, preserving whatever we can
if bHasZero:
sNewNumber+=sNumber[:lOrig[0]]
nStartChar=lOrig[0]+1
# add stuff before the prefix starts
sNewNumber += sNumber[nStartChar:lOrig[nPrefixStart]]
# add the new prefix, taking care not to omit stuff between the digits, and
# everything up to the suffix start
sNewNumber += sNewPrefix[0] + \
sNumber[lOrig[nPrefixStart]+1:lOrig[nPrefixStart+1]] + \
sNewPrefix[1] + \
sNumber[lOrig[nPrefixStart+1]+1:lOrig[nSuffixStart]]
# add the new 1st digit and digit 2 and 3 (were 1 and 2 in the old system)
sNewNumber += sNewDigit + sNumber[lOrig[nSuffixStart]:lOrig[nSuffixStart+2]]
# if there's a delimiter between digits 3 and 4 of the prefix (012 345-678)
# and no delimiter between digits 2 and 3,
# now it's the time to put it, between the new 3 and 4 digits (012 234-5678)
# and then comes the 4th digit (previous 3rd digit)
# and then the rest of it
if (lOrig[nSuffixStart+2]+1<lOrig[nSuffixStart+3]) and \
(lOrig[nSuffixStart+1]+1==lOrig[nSuffixStart+2]):
sNewNumber += sNumber[lOrig[nSuffixStart+2]+1:lOrig[nSuffixStart+3]] + \
sNumber[lOrig[nSuffixStart+2]] + \
sNumber[lOrig[nSuffixStart+3]:]
else:
sNewNumber += sNumber[lOrig[nSuffixStart+2]:]
return sNewNumber
def unicode_readline(f,nBufferSize=4096):
"""
Generator function. Returns lines from the input file f,
ending on a utf-16 unicode end-of-line.
assumption: lines will never be smaller than nBufferSize
"""
sCRLF='\r\n'.encode('utf-16le')
sBOM='fffe'.decode('hex')
sBuf=f.read(nBufferSize)
nBufSize=len(sBuf)
# get rid of the pesky BOM
if sBuf[:2]==sBOM:
sBuf=sBuf[2:]
# search for a unicode \n, on even offsets
nStart=0
while True:
nNewLine=sBuf.find(sCRLF,nStart)
if nNewLine<0:
# didn't find any, read another chunk from the file
sBuf+=f.read(nBufferSize)
if len(sBuf)==nBufSize:
# no new input - the end of file is nigh
# dump out whatever's in there and exit
if len(sBuf):
yield sBuf.decode('utf-16le')+'\r\n'
return
nBufSize=len(sBuf)
if nNewLine % 2 == 0:
# it's even!
yield sBuf[:nNewLine].decode('utf-16le')+'\r\n'
sBuf=sBuf[nNewLine+len(sCRLF):]
nStart=0
else:
nStart=nNewLine+1
def ProcessNokiaFile(sFileName,bI18N=False):
"convert numbers within nokia calendar and phonebook files"
tConvertArgs = (u'209',u'210',u'211',u'212',u'213',u'219',u'208',u'1442')
tConvertLines = (u'200',u'225',u'1440')
# rename the file creating a backup copy
sOrigFileName=sFileName+'.orig'
os.rename(sFileName,sOrigFileName)
# open the files
oInFile=file(sOrigFileName,'rb')
oOutFile=codecs.open(sFileName,'wb','utf-16')
# process the file record-by-record
nLine=0
for sLine in unicode_readline(file(sOrigFileName,"rb")):
nLine+=1
# the line is tab-delimited
lLine=sLine[:-2].split(u'\t')
# each couple in the line form a touple - argument and value
if len(lLine) % 2 != 0:
# line has odd number of elements
print "Line %d has an odd number of elements (copied verbatim)"
oOutFile.write(sLine)
continue
# see if the line is worth converting. If not - just copy verbatim
if not lLine[0] in tConvertLines:
oOutFile.write(sLine)
continue
# go over the touples of arg and val, by stepping in twos, and convert
for i in range(2,len(lLine),2):
if lLine[i] in tConvertArgs:
lLine[i+1]=NumConvert(lLine[i+1],bI18N)
# go the extra mile and convert any and all 9-digit numbers
# that begin with a 0 to a +972 prefix if we're doing i18n
if bI18N and \
lLine[i+1][0]==u'0' and \
not lLine[i+1][1] in (u'0',u'1') and \
len(lLine[i+1]) in (9,10):
lLine[i+1]=u'+972'+lLine[i+1][1:]
# print it into the output file
oOutFile.write(u'\t'.join(lLine)+'\r\n')
oOutFile.close()
oInFile.close()
def ProcessExcel(sFileName,bI18N=False):
# copy the file to prevent mishap
import win32file
sOrigFileName=sFileName+'.orig'
os.rename(sFileName,sOrigFileName)
win32file.CopyFile(sOrigFileName,sFileName,True)
import win32com.client
win32com.client.gencache.EnsureDispatch('ADODB.Connection',False)
# Open an ADO connection
oConn=win32com.client.Dispatch('ADODB.Connection')
oConn.Provider="Microsoft.Jet.OLEDB.4.0"
oConn.Properties("Extended Properties").Value="Excel 8.0;HDR=NO"
oConn.Open(sFileName)
# Get the sheet list
oRS=oConn.OpenSchema(win32com.client.constants.adSchemaTables)
oRS.MoveFirst()
lSheets=[]
while not oRS.EOF:
sTableName=oRS.Fields.Item('TABLE_NAME').Value
if sTableName.strip("'")[-1]==u'$':
lSheets.append(sTableName)
else:
print "Skipping",sTableName
oRS.MoveNext()
oRS.Close()
oRS=None
# go over the sheets
for sSheet in lSheets:
oRS = win32com.client.Dispatch('ADODB.Recordset')
oRS.CursorLocation = win32com.client.constants.adUseClient
oRS.Open("SELECT * FROM [%s]" % sSheet,
oConn,
win32com.client.constants.adOpenStatic,
win32com.client.constants.adLockOptimistic)
# go over the records in the sheets
oRS.MoveFirst()
print "\nSheet %s, %d fields\n" % (sSheet,oRS.Fields.Count)
while not oRS.EOF:
# go over a single row
nRowChanges=0
for oField in oRS.Fields:
if oField.Type == 202:
if oField.Value:
sConverted=NumConvert(oField.Value,bI18N)
# write the change to the cell - only if necessary
if sConverted!=oField.Value:
print "%s ==> %s" % (oField.Value,sConverted)
oField.Value=sConverted
nRowChanges+=1
# update the row if necessary
if nRowChanges:
try:
oRS.Update()
except win32com.client.pywintypes.com_error, tDetails:
# if there is a field is not updatable, retract the change and
move on
if tDetails[0] == -2147352567:
oRS.CancelUpdate()
print "*** Update canceled for last %d change(s) due to\n" %
nRowChanges + \
"*** a read-only column in the row (maybe formula?)"
else:
raise win32com.client.pywintypes.com_error, tDetails
oRS.MoveNext()
oRS.Close()
oRS=None
def main():
print "Phone number converter for Excel and Nokia Data Suite v0.99\n"
# parse the command line options
lOptions = [x.split("=",1) for x in os.sys.argv[1:]]
for tOption in lOptions:
if len(tOption)==2:
if tOption[1][0]=='"' and tOption[1][-1]=='"':
tOption[1]=tOption[1][1:-1]
else:
tOption.append(None)
dOptions=dict(lOptions)
print "Convertion Starting...\n"
fTStart=time.time()
if 'num' in dOptions:
print "Converting single number: %s ==> %s" %
(dOptions['num'],NumConvert(dOptions['num'],'i18n' in dOptions))
if 'nokiafile' in dOptions:
ProcessNokiaFile(dOptions['nokiafile'],'i18n' in dOptions)
if 'excel' in dOptions:
ProcessExcel(dOptions['excel'],'i18n' in dOptions)
print "\nOperation took %.3f seconds." % (time.time()-fTStart)
if __name__ == '__main__':
main()
**********************************************************************
This email and attachments have been scanned for
potential proprietary or sensitive information leakage.
Vidius, Inc. Protecting Your Information from the Inside Out.
www.vidius.com
**********************************************************************
================================================================To unsubscribe, send
mail to [EMAIL PROTECTED] with
the word "unsubscribe" in the message body, e.g., run the command
echo unsubscribe | mail [EMAIL PROTECTED]