Compare commits

...

4 Commits
v1.0 ... v1.3

Author SHA1 Message Date
Apprentice Alf
dce51ae232 tools v1.3 2015-03-02 07:35:40 +00:00
Apprentice Alf
2819550411 tools v1.1 2015-03-02 07:32:21 +00:00
Anonymous
f8154c4615 ineptpdf 5 by anon 2015-02-28 14:38:24 +00:00
i♥cabbages
58833e7dc5 Unknown date, late 2009/early 2010 2015-02-28 14:35:29 +00:00
24 changed files with 473 additions and 186 deletions

View File

@@ -62,7 +62,7 @@ class ASN1Parser(object):
def __init__(self, bytes): def __init__(self, bytes):
self.bytes = bytes self.bytes = bytes
self.index = 0 self.index = 0
def get(self, length): def get(self, length):
if self.index + length > len(self.bytes): if self.index + length > len(self.bytes):
raise ASN1Error("Error decoding ASN.1") raise ASN1Error("Error decoding ASN.1")
@@ -72,22 +72,22 @@ class ASN1Parser(object):
x |= self.bytes[self.index] x |= self.bytes[self.index]
self.index += 1 self.index += 1
return x return x
def getFixBytes(self, lengthBytes): def getFixBytes(self, lengthBytes):
bytes = self.bytes[self.index : self.index+lengthBytes] bytes = self.bytes[self.index : self.index+lengthBytes]
self.index += lengthBytes self.index += lengthBytes
return bytes return bytes
def getVarBytes(self, lengthLength): def getVarBytes(self, lengthLength):
lengthBytes = self.get(lengthLength) lengthBytes = self.get(lengthLength)
return self.getFixBytes(lengthBytes) return self.getFixBytes(lengthBytes)
def getFixList(self, length, lengthList): def getFixList(self, length, lengthList):
l = [0] * lengthList l = [0] * lengthList
for x in range(lengthList): for x in range(lengthList):
l[x] = self.get(length) l[x] = self.get(length)
return l return l
def getVarList(self, length, lengthLength): def getVarList(self, length, lengthLength):
lengthList = self.get(lengthLength) lengthList = self.get(lengthLength)
if lengthList % length != 0: if lengthList % length != 0:
@@ -97,19 +97,19 @@ class ASN1Parser(object):
for x in range(lengthList): for x in range(lengthList):
l[x] = self.get(length) l[x] = self.get(length)
return l return l
def startLengthCheck(self, lengthLength): def startLengthCheck(self, lengthLength):
self.lengthCheck = self.get(lengthLength) self.lengthCheck = self.get(lengthLength)
self.indexCheck = self.index self.indexCheck = self.index
def setLengthCheck(self, length): def setLengthCheck(self, length):
self.lengthCheck = length self.lengthCheck = length
self.indexCheck = self.index self.indexCheck = self.index
def stopLengthCheck(self): def stopLengthCheck(self):
if (self.index - self.indexCheck) != self.lengthCheck: if (self.index - self.indexCheck) != self.lengthCheck:
raise ASN1Error("Error decoding ASN.1") raise ASN1Error("Error decoding ASN.1")
def atLengthCheck(self): def atLengthCheck(self):
if (self.index - self.indexCheck) < self.lengthCheck: if (self.index - self.indexCheck) < self.lengthCheck:
return False return False
@@ -162,7 +162,7 @@ class Decryptor(object):
path = elem.get('URI', None) path = elem.get('URI', None)
if path is not None: if path is not None:
encrypted.add(path) encrypted.add(path)
def decompress(self, bytes): def decompress(self, bytes):
dc = zlib.decompressobj(-15) dc = zlib.decompressobj(-15)
bytes = dc.decompress(bytes) bytes = dc.decompress(bytes)
@@ -170,7 +170,7 @@ class Decryptor(object):
if ex: if ex:
bytes = bytes + ex bytes = bytes + ex
return bytes return bytes
def decrypt(self, path, data): def decrypt(self, path, data):
if path in self._encrypted: if path in self._encrypted:
data = self._aes.decrypt(data)[16:] data = self._aes.decrypt(data)[16:]
@@ -336,5 +336,6 @@ def gui_main():
return 0 return 0
if __name__ == '__main__': if __name__ == '__main__':
# sys.exit(cli_main()) if len(sys.argv) > 1:
sys.exit(cli_main())
sys.exit(gui_main()) sys.exit(gui_main())

View File

@@ -73,6 +73,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -88,7 +89,7 @@ class MainDialog(Tkinter.Frame):
cmdline = 'python lib\kindlepid.py "' + serial + '"' cmdline = 'python lib\kindlepid.py "' + serial + '"'
else : else :
cmdline = 'lib\kindlepid.py "' + serial + '"' cmdline = 'lib\kindlepid.py "' + serial + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -113,6 +114,7 @@ class MainDialog(Tkinter.Frame):
log += 'Serial = "' + serial + '"\n' log += 'Serial = "' + serial + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n\n' log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.pidrdr(serial) self.p2 = self.pidrdr(serial)

View File

@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
self.mobipath = Tkinter.Entry(body, width=50) self.mobipath = Tkinter.Entry(body, width=50)
self.mobipath.grid(row=0, column=1, sticky=sticky) self.mobipath.grid(row=0, column=1, sticky=sticky)
self.mobipath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.mobipath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_mobipath) button = Tkinter.Button(body, text="...", command=self.get_mobipath)
button.grid(row=0, column=2) button.grid(row=0, column=2)
@@ -80,6 +82,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -96,6 +99,7 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\kindlefix.py "' + infile + '" "' + pidnum + '"' cmdline = 'lib\kindlefix.py "' + infile + '" "' + pidnum + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -138,6 +142,7 @@ class MainDialog(Tkinter.Frame):
log += 'PID = "' + pidnum + '"\n' log += 'PID = "' + pidnum + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n\n' log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.krdr(mobipath, pidnum) self.p2 = self.krdr(mobipath, pidnum)

View File

@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Mobi eBook input file').grid(row=0, sticky=Tkconstants.E)
self.mobipath = Tkinter.Entry(body, width=50) self.mobipath = Tkinter.Entry(body, width=50)
self.mobipath.grid(row=0, column=1, sticky=sticky) self.mobipath.grid(row=0, column=1, sticky=sticky)
self.mobipath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.mobipath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_mobipath) button = Tkinter.Button(body, text="...", command=self.get_mobipath)
button.grid(row=0, column=2) button.grid(row=0, column=2)
@@ -87,6 +89,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -103,6 +106,7 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"' cmdline = 'lib\mobidedrm.py "' + infile + '" "' + outfile + '" "' + pidnum + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -167,6 +171,7 @@ class MainDialog(Tkinter.Frame):
log += 'PID = "' + pidnum + '"\n' log += 'PID = "' + pidnum + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n\n' log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.mobirdr(mobipath, outpath, pidnum) self.p2 = self.mobirdr(mobipath, outpath, pidnum)

View File

@@ -25,15 +25,16 @@
# import filter it works when importing unencrypted files. # import filter it works when importing unencrypted files.
# Also now handles encrypted files that don't need a specific PID. # Also now handles encrypted files that don't need a specific PID.
# 0.11 - use autoflushed stdout and proper return values # 0.11 - use autoflushed stdout and proper return values
# 0.12 - Fix for problems with metadata import as Calibre plugin, report errors
class Unbuffered: class Unbuffered:
def __init__(self, stream): def __init__(self, stream):
self.stream = stream self.stream = stream
def write(self, data): def write(self, data):
self.stream.write(data) self.stream.write(data)
self.stream.flush() self.stream.flush()
def __getattr__(self, attr): def __getattr__(self, attr):
return getattr(self.stream, attr) return getattr(self.stream, attr)
import sys import sys
sys.stdout=Unbuffered(sys.stdout) sys.stdout=Unbuffered(sys.stdout)
@@ -45,37 +46,37 @@ class DrmException(Exception):
#implementation of Pukall Cipher 1 #implementation of Pukall Cipher 1
def PC1(key, src, decryption=True): def PC1(key, src, decryption=True):
sum1 = 0; sum1 = 0;
sum2 = 0; sum2 = 0;
keyXorVal = 0; keyXorVal = 0;
if len(key)!=16: if len(key)!=16:
print "Bad key length!" print "Bad key length!"
return None return None
wkey = [] wkey = []
for i in xrange(8): for i in xrange(8):
wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1])) wkey.append(ord(key[i*2])<<8 | ord(key[i*2+1]))
dst = "" dst = ""
for i in xrange(len(src)): for i in xrange(len(src)):
temp1 = 0; temp1 = 0;
byteXorVal = 0; byteXorVal = 0;
for j in xrange(8): for j in xrange(8):
temp1 ^= wkey[j] temp1 ^= wkey[j]
sum2 = (sum2+j)*20021 + sum1 sum2 = (sum2+j)*20021 + sum1
sum1 = (temp1*346)&0xFFFF sum1 = (temp1*346)&0xFFFF
sum2 = (sum2+sum1)&0xFFFF sum2 = (sum2+sum1)&0xFFFF
temp1 = (temp1*20021+1)&0xFFFF temp1 = (temp1*20021+1)&0xFFFF
byteXorVal ^= temp1 ^ sum2 byteXorVal ^= temp1 ^ sum2
curByte = ord(src[i]) curByte = ord(src[i])
if not decryption: if not decryption:
keyXorVal = curByte * 257; keyXorVal = curByte * 257;
curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF curByte = ((curByte ^ (byteXorVal >> 8)) ^ byteXorVal) & 0xFF
if decryption: if decryption:
keyXorVal = curByte * 257; keyXorVal = curByte * 257;
for j in xrange(8): for j in xrange(8):
wkey[j] ^= keyXorVal; wkey[j] ^= keyXorVal;
dst+=chr(curByte) dst+=chr(curByte)
return dst return dst
def checksumPid(s): def checksumPid(s):
letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789" letters = "ABCDEFGHIJKLMNPQRSTUVWXYZ123456789"
@@ -234,40 +235,46 @@ if not __name__ == "__main__":
description = 'Removes DRM from secure Mobi files' description = 'Removes DRM from secure Mobi files'
supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on supported_platforms = ['linux', 'osx', 'windows'] # Platforms this plugin will run on
author = 'The Dark Reverser' # The author of this plugin author = 'The Dark Reverser' # The author of this plugin
version = (0, 1, 0) # The version number of this plugin version = (0, 1, 2) # The version number of this plugin
file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to file_types = set(['prc','mobi','azw']) # The file types that this plugin will be applied to
on_import = True # Run this plugin during the import on_import = True # Run this plugin during the import
def run(self, path_to_ebook): def run(self, path_to_ebook):
of = self.temporary_file('.mobi') from calibre.gui2 import is_ok_to_use_qt
from PyQt4.Qt import QMessageBox
PID = self.site_customization PID = self.site_customization
data_file = file(path_to_ebook, 'rb').read() data_file = file(path_to_ebook, 'rb').read()
ar = PID.split(',') ar = PID.split(',')
for i in ar: for i in ar:
try: try:
file(of.name, 'wb').write(DrmStripper(data_file, i).getResult()) unlocked_file = DrmStripper(data_file, i).getResult()
except DrmException: except DrmException:
# Hm, we should display an error dialog here. # ignore the error
# Dunno how though. pass
# Ignore the dirty hack behind the curtain.
# strexcept = 'echo exception: %s > /dev/tty' % e
# subprocess.call(strexcept,shell=True)
print i + ": not PID for book"
else: else:
of = self.temporary_file('.mobi')
of.write(unlocked_file)
of.close()
return of.name return of.name
if is_ok_to_use_qt():
d = QMessageBox(QMessageBox.Warning, "MobiDeDRM Plugin", "Couldn't decode: %s\n\nImporting encrypted version." % path_to_ebook)
d.show()
d.raise_()
d.exec_()
return path_to_ebook
def customization_help(self, gui=False): def customization_help(self, gui=False):
return 'Enter PID (separate multiple PIDs with comma)' return 'Enter PID (separate multiple PIDs with comma)'
if __name__ == "__main__": if __name__ == "__main__":
print "MobiDeDrm v0.11. Copyright (c) 2008 The Dark Reverser" print "MobiDeDrm v0.12. Copyright (c) 2008 The Dark Reverser"
if len(sys.argv)<4: if len(sys.argv)<4:
print "Removes protection from Mobipocket books" print "Removes protection from Mobipocket books"
print "Usage:" print "Usage:"
print " mobidedrm infile.mobi outfile.mobi (PID)" print " mobidedrm infile.mobi outfile.mobi (PID)"
sys.exit(1) sys.exit(1)
else: else:
infile = sys.argv[1] infile = sys.argv[1]
outfile = sys.argv[2] outfile = sys.argv[2]
pid = sys.argv[3] pid = sys.argv[3]

View File

@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
self.tpzpath = Tkinter.Entry(body, width=50) self.tpzpath = Tkinter.Entry(body, width=50)
self.tpzpath.grid(row=0, column=1, sticky=sticky) self.tpzpath.grid(row=0, column=1, sticky=sticky)
self.tpzpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.tpzpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_tpzpath) button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
button.grid(row=0, column=2) button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E) Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50) self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky) self.outpath.grid(row=1, column=1, sticky=sticky)
self.outpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath) button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2) button.grid(row=1, column=2)
@@ -88,6 +92,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -108,6 +113,7 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\cmbtc_dump.py -v -d ' + pidoption + outoption + '"' + infile + '"' cmdline = 'lib\cmbtc_dump.py -v -d ' + pidoption + outoption + '"' + infile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -124,9 +130,11 @@ class MainDialog(Tkinter.Frame):
return return
def get_outpath(self): def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory( outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Extract Files into', parent=None, title='Directory to Extract Files into',
initialdir=os.getcwd(), initialfile=None) initialdir=cwd, initialfile=None)
if outpath: if outpath:
outpath = os.path.normpath(outpath) outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END) self.outpath.delete(0, Tkconstants.END)
@@ -168,6 +176,7 @@ class MainDialog(Tkinter.Frame):
log += 'First 8 chars of PID = "' + pidnum + '"\n' log += 'First 8 chars of PID = "' + pidnum + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n' log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(tpzpath, outpath, pidnum) self.p2 = self.topazrdr(tpzpath, outpath, pidnum)

View File

@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Topaz eBook input file').grid(row=0, sticky=Tkconstants.E)
self.tpzpath = Tkinter.Entry(body, width=50) self.tpzpath = Tkinter.Entry(body, width=50)
self.tpzpath.grid(row=0, column=1, sticky=sticky) self.tpzpath.grid(row=0, column=1, sticky=sticky)
self.tpzpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.tpzpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_tpzpath) button = Tkinter.Button(body, text="...", command=self.get_tpzpath)
button.grid(row=0, column=2) button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E) Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50) self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky) self.outpath.grid(row=1, column=1, sticky=sticky)
self.outpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath) button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2) button.grid(row=1, column=2)
@@ -88,6 +92,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -106,6 +111,7 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"' cmdline = 'lib\cmbtc_dump_nonK4PC.py -v -d ' + pidoption + outoption + '"' + infile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -122,9 +128,11 @@ class MainDialog(Tkinter.Frame):
return return
def get_outpath(self): def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory( outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Extract Files into', parent=None, title='Directory to Extract Files into',
initialdir=os.getcwd(), initialfile=None) initialdir=cwd, initialfile=None)
if outpath: if outpath:
outpath = os.path.normpath(outpath) outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END) self.outpath.delete(0, Tkconstants.END)
@@ -166,6 +174,7 @@ class MainDialog(Tkinter.Frame):
log += 'First 8 chars of PID = "' + pidnum + '"\n' log += 'First 8 chars of PID = "' + pidnum + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n' log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(tpzpath, outpath, pidnum) self.p2 = self.topazrdr(tpzpath, outpath, pidnum)

View File

@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
self.bookdir = Tkinter.Entry(body, width=50) self.bookdir = Tkinter.Entry(body, width=50)
self.bookdir.grid(row=0, column=1, sticky=sticky) self.bookdir.grid(row=0, column=1, sticky=sticky)
self.bookdir.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.bookdir.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_bookdir) button = Tkinter.Button(body, text="...", command=self.get_bookdir)
button.grid(row=0, column=2) button.grid(row=0, column=2)
@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\genhtml.py "' + bookdir + '"' cmdline = 'lib\genhtml.py "' + bookdir + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
def get_bookdir(self): def get_bookdir(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
bookdir = tkFileDialog.askdirectory( bookdir = tkFileDialog.askdirectory(
parent=None, title='Select the Directory you Extracted Topaz Files into', parent=None, title='Select the Directory you Extracted Topaz Files into',
initialdir=os.getcwd(), initialfile=None) initialdir=cwd, initialfile=None)
if bookdir: if bookdir:
bookdir = os.path.normpath(bookdir) bookdir = os.path.normpath(bookdir)
self.bookdir.delete(0, Tkconstants.END) self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
log += 'Book Directory = "' + bookdir + '"\n' log += 'Book Directory = "' + bookdir + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n' log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(bookdir) self.p2 = self.topazrdr(bookdir)

View File

@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
self.bookdir = Tkinter.Entry(body, width=50) self.bookdir = Tkinter.Entry(body, width=50)
self.bookdir.grid(row=0, column=1, sticky=sticky) self.bookdir.grid(row=0, column=1, sticky=sticky)
self.bookdir.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.bookdir.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_bookdir) button = Tkinter.Button(body, text="...", command=self.get_bookdir)
button.grid(row=0, column=2) button.grid(row=0, column=2)
@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\gensvg.py "' + bookdir + '"' cmdline = 'lib\gensvg.py "' + bookdir + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
def get_bookdir(self): def get_bookdir(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
bookdir = tkFileDialog.askdirectory( bookdir = tkFileDialog.askdirectory(
parent=None, title='Select the Directory you Extracted Topaz Files into', parent=None, title='Select the Directory you Extracted Topaz Files into',
initialdir=os.getcwd(), initialfile=None) initialdir=cwd, initialfile=None)
if bookdir: if bookdir:
bookdir = os.path.normpath(bookdir) bookdir = os.path.normpath(bookdir)
self.bookdir.delete(0, Tkconstants.END) self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
log += 'Book Directory = "' + bookdir + '"\n' log += 'Book Directory = "' + bookdir + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n' log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(bookdir) self.p2 = self.topazrdr(bookdir)

View File

@@ -31,7 +31,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='Directory you Extracted Topaz Files into').grid(row=0, sticky=Tkconstants.E)
self.bookdir = Tkinter.Entry(body, width=50) self.bookdir = Tkinter.Entry(body, width=50)
self.bookdir.grid(row=0, column=1, sticky=sticky) self.bookdir.grid(row=0, column=1, sticky=sticky)
self.bookdir.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.bookdir.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_bookdir) button = Tkinter.Button(body, text="...", command=self.get_bookdir)
button.grid(row=0, column=2) button.grid(row=0, column=2)
@@ -76,6 +78,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -92,14 +95,17 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\genxml.py "' + bookdir + '"' cmdline = 'lib\genxml.py "' + bookdir + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
def get_bookdir(self): def get_bookdir(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
bookdir = tkFileDialog.askdirectory( bookdir = tkFileDialog.askdirectory(
parent=None, title='Select the Directory you Extracted Topaz Files into', parent=None, title='Select the Directory you Extracted Topaz Files into',
initialdir=os.getcwd(), initialfile=None) initialdir=cwd, initialfile=None)
if bookdir: if bookdir:
bookdir = os.path.normpath(bookdir) bookdir = os.path.normpath(bookdir)
self.bookdir.delete(0, Tkconstants.END) self.bookdir.delete(0, Tkconstants.END)
@@ -127,6 +133,7 @@ class MainDialog(Tkinter.Frame):
log += 'Book Directory = "' + bookdir + '"\n' log += 'Book Directory = "' + bookdir + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n' log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.topazrdr(bookdir) self.p2 = self.topazrdr(bookdir)

View File

@@ -1,5 +1,5 @@
#! /usr/bin/python #! /usr/bin/python
# For use in Topaz Scripts version 2.2 # For use in Topaz Scripts version 2.3
""" """

View File

@@ -1,5 +1,5 @@
#!/usr/bin/python #!/usr/bin/python
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
class Unbuffered: class Unbuffered:
def __init__(self, stream): def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
class Unbuffered: class Unbuffered:
def __init__(self, stream): def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
import csv import csv
import sys import sys

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
import sys import sys
import csv import csv
@@ -346,35 +346,40 @@ class DocParser(object):
if end == -1 : if end == -1 :
end = self.docSize end = self.docSize
# seems some xml has last* coming before first* so we have to
# handle any order
sp_first = -1
sp_last = -1
gl_first = -1
gl_last = -1
ws_first = -1
ws_last = -1
word_class = ''
while (line < end) : while (line < end) :
(name, argres) = self.lineinDoc(line) (name, argres) = self.lineinDoc(line)
# handle both span and _span
if name.endswith('span.firstWord') : if name.endswith('span.firstWord') :
first = int(argres) sp_first = int(argres)
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('span.lastWord'): elif name.endswith('span.lastWord') :
print 'Error: - incorrect _span ordering inside paragraph' sp_last = int(argres)
last = int(argres)
for wordnum in xrange(first, last):
result.append(('ocr', wordnum))
line += 1
elif name.endswith('word.firstGlyph') : elif name.endswith('word.firstGlyph') :
first = int(argres) gl_first = int(argres)
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('word.lastGlyph'): elif name.endswith('word.lastGlyph') :
print 'Error: - incorrect glyph ordering inside word in paragraph' gl_last = int(argres)
last = int(argres)
glyphList = [] elif name.endswith('word_semantic.firstWord'):
for glyphnum in xrange(first, last): ws_first = int(argres)
glyphList.append(glyphnum)
num = self.svgcount elif name.endswith('word_semantic.lastWord'):
self.glyphs_to_image(glyphList) ws_last = int(argres)
self.svgcount += 1
result.append(('svg', num))
line += 1
elif name.endswith('word.class'): elif name.endswith('word.class'):
(cname, space) = argres.split('-',1) (cname, space) = argres.split('-',1)
@@ -386,15 +391,28 @@ class DocParser(object):
result.append(('img' + word_class, int(argres))) result.append(('img' + word_class, int(argres)))
word_class = '' word_class = ''
elif name.endswith('word_semantic.firstWord'): if (sp_first != -1) and (sp_last != -1):
first = int(argres) for wordnum in xrange(sp_first, sp_last):
(name, argres) = self.lineinDoc(line+1)
if not name.endswith('word_semantic.lastWord'):
print 'Error: - incorrect word_semantic ordering inside paragraph'
last = int(argres)
for wordnum in xrange(first, last):
result.append(('ocr', wordnum)) result.append(('ocr', wordnum))
line += 1 sp_first = -1
sp_last = -1
if (gl_first != -1) and (gl_last != -1):
glyphList = []
for glyphnum in xrange(gl_first, gl_last):
glyphList.append(glyphnum)
num = self.svgcount
self.glyphs_to_image(glyphList)
self.svgcount += 1
result.append(('svg', num))
gl_first = -1
gl_last = -1
if (ws_first != -1) and (ws_last != -1):
for wordnum in xrange(ws_first, ws_last):
result.append(('ocr', wordnum))
ws_first = -1
ws_last = -1
line += 1 line += 1

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
class Unbuffered: class Unbuffered:
def __init__(self, stream): def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
class Unbuffered: class Unbuffered:
def __init__(self, stream): def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
class Unbuffered: class Unbuffered:
def __init__(self, stream): def __init__(self, stream):

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
import csv import csv
import sys import sys

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab # vim:ts=4:sw=4:softtabstop=4:smarttab:expandtab
# For use with Topaz Scripts Version 2.2 # For use with Topaz Scripts Version 2.3
import csv import csv
import sys import sys

View File

@@ -1,3 +1,7 @@
Canges in 2.3
- fix for use with non-latin1 based systems (thank you Tedd)
- fixes for out of order tokens in xml
Changes in 2.2 Changes in 2.2
- fix for minor bug in encode_Number from clark nova - fix for minor bug in encode_Number from clark nova
- more fixes to handle paths with spaces in them - more fixes to handle paths with spaces in them

View File

@@ -30,7 +30,9 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='eBook Pml input file').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='eBook Pml input file').grid(row=0, sticky=Tkconstants.E)
self.pmlpath = Tkinter.Entry(body, width=50) self.pmlpath = Tkinter.Entry(body, width=50)
self.pmlpath.grid(row=0, column=1, sticky=sticky) self.pmlpath.grid(row=0, column=1, sticky=sticky)
self.pmlpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.pmlpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_pmlpath) button = Tkinter.Button(body, text="...", command=self.get_pmlpath)
button.grid(row=0, column=2) button.grid(row=0, column=2)
@@ -82,6 +84,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -98,6 +101,7 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"' cmdline = 'lib\\xpml2xhtml.py "' + infile + '" "' + outfile + '"'
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -156,6 +160,7 @@ class MainDialog(Tkinter.Frame):
log += 'HTML Output File = "' + outpath + '"\n' log += 'HTML Output File = "' + outpath + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n\n' log += 'Please Wait ...\n\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.pmlhtml(pmlpath, outpath) self.p2 = self.pmlhtml(pmlpath, outpath)

View File

@@ -31,14 +31,18 @@ class MainDialog(Tkinter.Frame):
Tkinter.Label(body, text='eBook PDB input file').grid(row=0, sticky=Tkconstants.E) Tkinter.Label(body, text='eBook PDB input file').grid(row=0, sticky=Tkconstants.E)
self.pdbpath = Tkinter.Entry(body, width=50) self.pdbpath = Tkinter.Entry(body, width=50)
self.pdbpath.grid(row=0, column=1, sticky=sticky) self.pdbpath.grid(row=0, column=1, sticky=sticky)
self.pdbpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.pdbpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_pdbpath) button = Tkinter.Button(body, text="...", command=self.get_pdbpath)
button.grid(row=0, column=2) button.grid(row=0, column=2)
Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E) Tkinter.Label(body, text='Output Directory').grid(row=1, sticky=Tkconstants.E)
self.outpath = Tkinter.Entry(body, width=50) self.outpath = Tkinter.Entry(body, width=50)
self.outpath.grid(row=1, column=1, sticky=sticky) self.outpath.grid(row=1, column=1, sticky=sticky)
self.outpath.insert(0, os.getcwd()) cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
self.outpath.insert(0, cwd)
button = Tkinter.Button(body, text="...", command=self.get_outpath) button = Tkinter.Button(body, text="...", command=self.get_outpath)
button.grid(row=1, column=2) button.grid(row=1, column=2)
@@ -93,6 +97,7 @@ class MainDialog(Tkinter.Frame):
# post output from subprocess in scrolled text widget # post output from subprocess in scrolled text widget
def showCmdOutput(self, msg): def showCmdOutput(self, msg):
if msg and msg !='': if msg and msg !='':
msg = msg.encode('utf-8')
self.stext.insert(Tkconstants.END,msg) self.stext.insert(Tkconstants.END,msg)
self.stext.yview_pickplace(Tkconstants.END) self.stext.yview_pickplace(Tkconstants.END)
return return
@@ -109,6 +114,7 @@ class MainDialog(Tkinter.Frame):
else : else :
cmdline = 'lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum cmdline = 'lib\erdr2pml.py "' + infile + '" "' + outdir + '" "' + name + '" ' + ccnum
cmdline = cmdline.encode(sys.getfilesystemencoding())
p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False) p2 = Process(cmdline, shell=True, bufsize=1, stdin=None, stdout=PIPE, stderr=PIPE, close_fds=False)
return p2 return p2
@@ -125,9 +131,11 @@ class MainDialog(Tkinter.Frame):
return return
def get_outpath(self): def get_outpath(self):
cwd = os.getcwdu()
cwd = cwd.encode('utf-8')
outpath = tkFileDialog.askdirectory( outpath = tkFileDialog.askdirectory(
parent=None, title='Directory to Store Output into', parent=None, title='Directory to Store Output into',
initialdir=os.getcwd(), initialfile=None) initialdir=cwd, initialfile=None)
if outpath: if outpath:
outpath = os.path.normpath(outpath) outpath = os.path.normpath(outpath)
self.outpath.delete(0, Tkconstants.END) self.outpath.delete(0, Tkconstants.END)
@@ -175,6 +183,7 @@ class MainDialog(Tkinter.Frame):
log += 'Last 8 of CC = "' + ccnum + '"\n' log += 'Last 8 of CC = "' + ccnum + '"\n'
log += '\n\n' log += '\n\n'
log += 'Please Wait ...\n' log += 'Please Wait ...\n'
log = log.encode('utf-8')
self.stext.insert(Tkconstants.END,log) self.stext.insert(Tkconstants.END,log)
self.p2 = self.erdr(pdbpath, outpath, name, ccnum) self.p2 = self.erdr(pdbpath, outpath, name, ccnum)

View File

@@ -1,6 +1,6 @@
#! /usr/bin/python #! /usr/bin/python
# ineptpdf.pyw, version 2 # ineptpdf5.pyw, version 5
# To run this program install Python 2.6 from http://www.python.org/download/ # To run this program install Python 2.6 from http://www.python.org/download/
# and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto # and PyCrypto from http://www.voidspace.org.uk/python/modules.shtml#pycrypto
@@ -10,6 +10,9 @@
# Revision history: # Revision history:
# 1 - Initial release # 1 - Initial release
# 2 - Improved determination of key-generation algorithm # 2 - Improved determination of key-generation algorithm
# 3 - Correctly handle PDF >=1.5 cross-reference streams
# 4 - Removal of ciando's personal ID (anon)
# 5 - removing small bug with V3 ebooks (anon)
""" """
Decrypt Adobe ADEPT-encrypted PDF files. Decrypt Adobe ADEPT-encrypted PDF files.
@@ -25,7 +28,7 @@ import re
import zlib import zlib
import struct import struct
import hashlib import hashlib
from itertools import chain from itertools import chain, islice
import xml.etree.ElementTree as etree import xml.etree.ElementTree as etree
import Tkinter import Tkinter
import Tkconstants import Tkconstants
@@ -163,16 +166,16 @@ def nunpack(s, default=0):
elif l == 1: elif l == 1:
return ord(s) return ord(s)
elif l == 2: elif l == 2:
return unpack('>H', s)[0] return struct.unpack('>H', s)[0]
elif l == 3: elif l == 3:
return unpack('>L', '\x00'+s)[0] return struct.unpack('>L', '\x00'+s)[0]
elif l == 4: elif l == 4:
return unpack('>L', s)[0] return struct.unpack('>L', s)[0]
else: else:
return TypeError('invalid length: %d' % l) return TypeError('invalid length: %d' % l)
STRICT = 0 STRICT = 1
## PS Exceptions ## PS Exceptions
@@ -680,6 +683,12 @@ class PSStackParser(PSBaseParser):
return obj return obj
LITERAL_CRYPT = PSLiteralTable.intern('Crypt')
LITERALS_FLATE_DECODE = (PSLiteralTable.intern('FlateDecode'), PSLiteralTable.intern('Fl'))
LITERALS_LZW_DECODE = (PSLiteralTable.intern('LZWDecode'), PSLiteralTable.intern('LZW'))
LITERALS_ASCII85_DECODE = (PSLiteralTable.intern('ASCII85Decode'), PSLiteralTable.intern('A85'))
## PDF Objects ## PDF Objects
## ##
class PDFObject(PSObject): pass class PDFObject(PSObject): pass
@@ -741,11 +750,11 @@ def decipher_all(decipher, objid, genno, x):
''' '''
if isinstance(x, str): if isinstance(x, str):
return decipher(objid, genno, x) return decipher(objid, genno, x)
decf = lambda v: decipher_all(decipher, objid, genno, v)
if isinstance(x, list): if isinstance(x, list):
x = [ decipher_all(decipher, objid, genno, v) for v in x ] x = [decf(v) for v in x]
elif isinstance(x, dict): elif isinstance(x, dict):
for (k,v) in x.iteritems(): x = dict((k, decf(v)) for (k, v) in x.iteritems())
x[k] = decipher_all(decipher, objid, genno, v)
return x return x
# Type cheking # Type cheking
@@ -805,6 +814,28 @@ def stream_value(x):
return PDFStream({}, '') return PDFStream({}, '')
return x return x
# ascii85decode(data)
def ascii85decode(data):
n = b = 0
out = ''
for c in data:
if '!' <= c and c <= 'u':
n += 1
b = b*85+(ord(c)-33)
if n == 5:
out += struct.pack('>L',b)
n = b = 0
elif c == 'z':
assert n == 0
out += '\0\0\0\0'
elif c == '~':
if n:
for _ in range(5-n):
b = b*85+84
out += struct.pack('>L',b)[:n-1]
break
return out
## PDFStream type ## PDFStream type
## ##
@@ -834,12 +865,76 @@ class PDFStream(PDFObject):
return '<PDFStream(%r): raw=%d, %r>' % \ return '<PDFStream(%r): raw=%d, %r>' % \
(self.objid, len(self.rawdata), self.dic) (self.objid, len(self.rawdata), self.dic)
def decode(self):
assert self.data == None and self.rawdata != None
data = self.rawdata
if self.decipher:
# Handle encryption
data = self.decipher(self.objid, self.genno, data)
if 'Filter' not in self.dic:
self.data = data
self.rawdata = None
return
filters = self.dic['Filter']
if not isinstance(filters, list):
filters = [ filters ]
for f in filters:
if f in LITERALS_FLATE_DECODE:
# will get errors if the document is encrypted.
data = zlib.decompress(data)
elif f in LITERALS_LZW_DECODE:
try:
from cStringIO import StringIO
except ImportError:
from StringIO import StringIO
data = ''.join(LZWDecoder(StringIO(data)).run())
elif f in LITERALS_ASCII85_DECODE:
data = ascii85decode(data)
elif f == LITERAL_CRYPT:
raise PDFNotImplementedError('/Crypt filter is unsupported')
else:
raise PDFNotImplementedError('Unsupported filter: %r' % f)
# apply predictors
if 'DP' in self.dic:
params = self.dic['DP']
else:
params = self.dic.get('DecodeParms', {})
if 'Predictor' in params:
pred = int_value(params['Predictor'])
if pred:
if pred != 12:
raise PDFNotImplementedError(
'Unsupported predictor: %r' % pred)
if 'Columns' not in params:
raise PDFValueError(
'Columns undefined for predictor=12')
columns = int_value(params['Columns'])
buf = ''
ent0 = '\x00' * columns
for i in xrange(0, len(data), columns+1):
pred = data[i]
ent1 = data[i+1:i+1+columns]
if pred == '\x02':
ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \
for (a,b) in zip(ent0,ent1))
buf += ent1
ent0 = ent1
data = buf
self.data = data
self.rawdata = None
return
def get_data(self):
if self.data == None:
self.decode()
return self.data
def get_rawdata(self): def get_rawdata(self):
return self.rawdata return self.rawdata
def get_decdata(self): def get_decdata(self):
data = self.rawdata data = self.rawdata
if self.decipher: if self.decipher and data:
# Handle encryption # Handle encryption
data = self.decipher(self.objid, self.genno, data) data = self.decipher(self.objid, self.genno, data)
return data return data
@@ -932,6 +1027,66 @@ class PDFXRef(object):
return (None, pos) return (None, pos)
## PDFXRefStream
##
class PDFXRefStream(object):
def __init__(self):
self.index = None
self.data = None
self.entlen = None
self.fl1 = self.fl2 = self.fl3 = None
return
def __repr__(self):
return '<PDFXRef: objid=%d-%d>' % (self.objid_first, self.objid_last)
def objids(self):
for first, size in self.index:
for objid in xrange(first, first + size):
yield objid
def load(self, parser, debug=0):
(_,objid) = parser.nexttoken() # ignored
(_,genno) = parser.nexttoken() # ignored
(_,kwd) = parser.nexttoken()
(_,stream) = parser.nextobject()
if not isinstance(stream, PDFStream) or \
stream.dic['Type'] is not LITERAL_XREF:
raise PDFNoValidXRef('Invalid PDF stream spec.')
size = stream.dic['Size']
index = stream.dic.get('Index', (0,size))
self.index = zip(islice(index, 0, None, 2),
islice(index, 1, None, 2))
(self.fl1, self.fl2, self.fl3) = stream.dic['W']
self.data = stream.get_data()
self.entlen = self.fl1+self.fl2+self.fl3
self.trailer = stream.dic
return
def getpos(self, objid):
offset = 0
for first, size in self.index:
if first <= objid and objid < (first + size):
break
offset += size
else:
raise KeyError(objid)
i = self.entlen * ((objid - first) + offset)
ent = self.data[i:i+self.entlen]
f1 = nunpack(ent[:self.fl1], 1)
if f1 == 1:
pos = nunpack(ent[self.fl1:self.fl1+self.fl2])
genno = nunpack(ent[self.fl1+self.fl2:])
return (None, pos)
elif f1 == 2:
objid = nunpack(ent[self.fl1:self.fl1+self.fl2])
index = nunpack(ent[self.fl1+self.fl2:])
return (objid, index)
# this is a free object
raise KeyError(objid)
## PDFDocument ## PDFDocument
## ##
## A PDFDocument object represents a PDF document. ## A PDFDocument object represents a PDF document.
@@ -1020,7 +1175,7 @@ class PDFDocument(object):
key = ASN1Parser([ord(x) for x in keyder]) key = ASN1Parser([ord(x) for x in keyder])
key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)] key = [bytesToNumber(key.getChild(x).value) for x in xrange(1, 4)]
rsa = RSA.construct(key) rsa = RSA.construct(key)
length = int_value(param.get('Length')) / 8 length = int_value(param.get('Length', 0)) / 8
rights = str_value(param.get('ADEPT_LICENSE')).decode('base64') rights = str_value(param.get('ADEPT_LICENSE')).decode('base64')
rights = zlib.decompress(rights, -15) rights = zlib.decompress(rights, -15)
rights = etree.fromstring(rights) rights = etree.fromstring(rights)
@@ -1031,11 +1186,16 @@ class PDFDocument(object):
raise ADEPTError('error decrypting book session key') raise ADEPTError('error decrypting book session key')
index = bookkey.index('\0') + 1 index = bookkey.index('\0') + 1
bookkey = bookkey[index:] bookkey = bookkey[index:]
V = 2 ebx_V = int_value(param.get('V', 4))
if (length and len(bookkey) == (length + 1)) or \ ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6))
(not length and len(bookkey) & 1 == 1): # added because of the booktype / decryption book session key error
if ebx_V == 3:
V = 3
elif ebx_V < 4 or ebx_type < 6:
V = ord(bookkey[0]) V = ord(bookkey[0])
bookkey = bookkey[1:] bookkey = bookkey[1:]
else:
V = 2
if length and len(bookkey) != length: if length and len(bookkey) != length:
raise ADEPTError('error decrypting book session key') raise ADEPTError('error decrypting book session key')
self.decrypt_key = bookkey self.decrypt_key = bookkey
@@ -1131,46 +1291,17 @@ class PDFDocument(object):
else: else:
for xref in self.xrefs: for xref in self.xrefs:
try: try:
(strmid, index) = xref.getpos(objid) (stmid, index) = xref.getpos(objid)
break break
except KeyError: except KeyError:
pass pass
else: else:
if STRICT: return
raise PDFSyntaxError('Cannot locate objid=%r' % objid) #if STRICT:
# raise PDFSyntaxError('Cannot locate objid=%r' % objid)
return None return None
if strmid: if stmid:
stream = stream_value(self.getobj(strmid)) return PDFObjStmRef(objid, stmid, index)
if stream.dic.get('Type') is not LITERAL_OBJSTM:
if STRICT:
raise PDFSyntaxError('Not a stream object: %r' % stream)
try:
n = stream.dic['N']
except KeyError:
if STRICT:
raise PDFSyntaxError('N is not defined: %r' % stream)
n = 0
if strmid in self.parsed_objs:
objs = self.parsed_objs[strmid]
else:
parser = PDFObjStrmParser(self, stream.get_data())
objs = []
try:
while 1:
(_,obj) = parser.nextobject()
objs.append(obj)
except PSEOF:
pass
self.parsed_objs[strmid] = objs
genno = 0
i = n*2+index
try:
obj = objs[i]
except IndexError:
raise PDFSyntaxError(
'Invalid object number: objid=%r' % (objid))
if isinstance(obj, PDFStream):
obj.set_objid(objid, 0)
else: else:
self.parser.seek(index) self.parser.seek(index)
(_,objid1) = self.parser.nexttoken() # objid (_,objid1) = self.parser.nexttoken() # objid
@@ -1184,11 +1315,17 @@ class PDFDocument(object):
if isinstance(obj, PDFStream): if isinstance(obj, PDFStream):
obj.set_objid(objid, genno) obj.set_objid(objid, genno)
self.objs[objid] = obj self.objs[objid] = obj
if self.decipher: if self.decipher:
obj = decipher_all(self.decipher, objid, genno, obj) obj = decipher_all(self.decipher, objid, genno, obj)
return obj return obj
class PDFObjStmRef(object):
def __init__(self, objid, stmid, index):
self.objid = objid
self.stmid = stmid
self.index = index
## PDFParser ## PDFParser
## ##
class PDFParser(PSStackParser): class PDFParser(PSStackParser):
@@ -1290,14 +1427,24 @@ class PDFParser(PSStackParser):
(pos, token) = self.nexttoken() (pos, token) = self.nexttoken()
except PSEOF: except PSEOF:
raise PDFNoValidXRef('Unexpected EOF') raise PDFNoValidXRef('Unexpected EOF')
if token is not self.KEYWORD_XREF: if isinstance(token, int):
raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % # XRefStream: PDF-1.5
(pos, token)) self.seek(pos)
self.nextline() self.reset()
xref = PDFXRef() xref = PDFXRefStream()
xref.load(self) xref.load(self)
else:
if token is not self.KEYWORD_XREF:
raise PDFNoValidXRef('xref not found: pos=%d, token=%r' %
(pos, token))
self.nextline()
xref = PDFXRef()
xref.load(self)
xrefs.append(xref) xrefs.append(xref)
trailer = xref.trailer trailer = xref.trailer
if 'XRefStm' in trailer:
pos = int_value(trailer['XRefStm'])
self.read_xref_from(pos, xrefs)
if 'Prev' in trailer: if 'Prev' in trailer:
# find previous xref # find previous xref
pos = int_value(trailer['Prev']) pos = int_value(trailer['Prev'])
@@ -1345,10 +1492,13 @@ class PDFSerializer(object):
parser = PDFParser(doc, inf) parser = PDFParser(doc, inf)
doc.initialize(keypath) doc.initialize(keypath)
self.objids = objids = set() self.objids = objids = set()
for xref in doc.xrefs: for xref in reversed(doc.xrefs):
trailer = xref.trailer trailer = xref.trailer
for objid in xref.objids(): for objid in xref.objids():
objids.add(objid) objids.add(objid)
trailer = dict(trailer)
trailer.pop('Prev', None)
trailer.pop('XRefStm', None)
if 'Encrypt' in trailer: if 'Encrypt' in trailer:
objids.remove(trailer.pop('Encrypt').objid) objids.remove(trailer.pop('Encrypt').objid)
self.trailer = trailer self.trailer = trailer
@@ -1360,26 +1510,64 @@ class PDFSerializer(object):
doc = self.doc doc = self.doc
objids = self.objids objids = self.objids
xrefs = {} xrefs = {}
xrefstm = {}
maxobj = max(objids) maxobj = max(objids)
trailer = dict(self.trailer)
trailer['Size'] = maxobj + 1
for objid in objids: for objid in objids:
obj = doc.getobj(objid)
if isinstance(obj, PDFObjStmRef):
xrefstm[objid] = obj
continue
xrefs[objid] = self.tell() xrefs[objid] = self.tell()
self.serialize_indirect(objid, doc.getobj(objid)) self.serialize_indirect(objid, obj)
startxref = self.tell() startxref = self.tell()
self.write('xref\n') self.write('xref\n')
self.write('0 %d\n' % (maxobj + 1,)) self.write('0 %d\n' % (maxobj + 1,))
for objid in xrange(0, maxobj + 1): for objid in xrange(0, maxobj + 1):
if objid in objids: if objid in xrefs:
self.write("%010d %05d n \n" % (xrefs[objid], 0)) self.write("%010d %05d n \n" % (xrefs[objid], 0))
else: else:
self.write("%010d %05d f \n" % (0, 65535)) self.write("%010d %05d f \n" % (0, 65535))
self.write('trailer\n') self.write('trailer\n')
self.serialize_object(self.trailer) self.serialize_object(trailer)
self.write('\nstartxref\n%d\n%%%%EOF' % startxref) self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
if not xrefstm:
def write(self, *data): return
for datum in data: index = []
self.outf.write(datum) first = None
self.last = data[-1][-1:] prev = None
data = []
for objid in sorted(xrefstm):
if first is None:
first = objid
elif objid != prev + 1:
index.extend((first, prev - first + 1))
first = objid
prev = objid
stmid = xrefstm[objid].stmid
data.append(struct.pack('>BHB', 2, stmid, 0))
index.extend((first, prev - first + 1))
data = zlib.compress(''.join(data))
dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index,
'W': [1, 2, 1], 'Length': len(data), 'Prev': startxref,
'Filter': LITERALS_FLATE_DECODE[0],}
obj = PDFStream(dic, data)
self.write('\n')
trailer['XRefStm'] = startxrefstm = self.tell()
self.serialize_indirect(maxobj + 1, obj)
trailer['Prev'] = startxref
startxref = self.tell()
self.write('xref\n')
self.write('%d 1\n' % (maxobj + 1,))
self.write("%010d %05d n \n" % (startxrefstm, 0))
self.write('trailer\n')
self.serialize_object(trailer)
self.write('\nstartxref\n%d\n%%%%EOF' % startxref)
def write(self, data):
self.outf.write(data)
self.last = data[-1:]
def tell(self): def tell(self):
return self.outf.tell() return self.outf.tell()
@@ -1389,6 +1577,9 @@ class PDFSerializer(object):
string = string.replace('\n', r'\n') string = string.replace('\n', r'\n')
string = string.replace('(', r'\(') string = string.replace('(', r'\(')
string = string.replace(')', r'\)') string = string.replace(')', r'\)')
# get rid of ciando id
regularexp = re.compile(r'http://www.ciando.com/index.cfm/intRefererID/\d{5}')
if regularexp.match(string): return ('http://www.ciando.com')
return string return string
def serialize_object(self, obj): def serialize_object(self, obj):
@@ -1566,5 +1757,6 @@ def gui_main():
if __name__ == '__main__': if __name__ == '__main__':
# sys.exit(cli_main()) if len(sys.argv) > 1:
sys.exit(cli_main())
sys.exit(gui_main()) sys.exit(gui_main())