Hackvent_2018/Teaser/_ZOoxjUSe1OVB7OPoVrsX.pdf.extracted/punch_card_decoder.py
Roman Hergenreder aa56104b3f Progress
2018-12-07 13:52:13 +01:00

342 lines
16 KiB
Python

#!/usr/bin/env python
#
# punchcard.py
#
# Copyright (C) 2011: Michael Hamilton
# The code is GPL 3.0(GNU General Public License) ( http://www.gnu.org/copyleft/gpl.html )
#
# import Image
from PIL import Image
import sys
from optparse import OptionParser
CARD_COLUMNS = 80
CARD_ROWS = 12
# found measurements at http://www.quadibloc.com/comp/cardint.htm
CARD_WIDTH = 7.0 + 3.0/8.0 # Inches
CARD_HEIGHT = 3.25 # Inches
CARD_COL_WIDTH = 0.087 # Inches
CARD_HOLE_WIDTH = 0.055 # Inches IBM, 0.056 Control Data
CARD_ROW_HEIGHT = 0.25 # Inches
CARD_HOLE_HEIGHT = 0.125 # Inches
CARD_TOPBOT_MARGIN = 3.0/16.0 # Inches at top and bottom
CARD_SIDE_MARGIN = 0.2235 # Inches on each side
CARD_SIDE_MARGIN_RATIO = CARD_SIDE_MARGIN/CARD_WIDTH # as proportion of card width (margin/width)
CARD_TOP_MARGIN_RATIO = CARD_TOPBOT_MARGIN/CARD_HEIGHT # as proportion of card height (margin/height)
CARD_ROW_HEIGHT_RATIO = CARD_ROW_HEIGHT/CARD_HEIGHT # as proportion of card height - works
CARD_COL_WIDTH_RATIO = CARD_COL_WIDTH/CARD_WIDTH # as proportion of card height - works
CARD_HOLE_HEIGHT_RATIO = CARD_HOLE_HEIGHT/CARD_HEIGHT # as proportion of card height - works
CARD_HOLE_WIDTH_RATIO = CARD_HOLE_WIDTH/CARD_WIDTH # as a proportion of card width
BRIGHTNESS_THRESHOLD = 200 # pixel brightness value (i.e. (R+G+B)/3)
IBM_MODEL_029_KEYPUNCH = """
/&-0123456789ABCDEFGHIJKLMNOPQR/STUVWXYZ:#@'="`.<(+|!$*);^~,%_>? |
12 / O OOOOOOOOO OOOOOO |
11| O OOOOOOOOO OOOOOO |
0| O OOOOOOOOO OOOOOO |
1| O O O O |
2| O O O O O O O O |
3| O O O O O O O O |
4| O O O O O O O O |
5| O O O O O O O O |
6| O O O O O O O O |
7| O O O O O O O O |
8| O O O O OOOOOOOOOOOOOOOOOOOOOOOO |
9| O O O O |
|__________________________________________________________________|"""
translate = None
if translate == None:
translate = {}
# Turn the ASCII art sideways and build a hash look up for
# column values, for example:
# (O, , ,O, , , , , , , , ):A
# (O, , , ,O, , , , , , , ):B
# (O, , , , ,O, , , , , , ):C
rows = IBM_MODEL_029_KEYPUNCH[1:].split('\n');
rotated = [[ r[i] for r in rows[0:13]] for i in range(5, len(rows[0]) - 1)]
for v in rotated:
translate[tuple(v[1:])] = v[0]
#print translate
# generate a range of floats
def drange(start, stop, step=1.0):
r = start
while (step >= 0.0 and r < stop) or (step < 0.0 and r > stop):
yield r
r += step
# Represents a punchcard image plus scanned data
class PunchCard(object):
def __init__(self, image, bright=-1, debug=False, xstart=0, xstop=0, ystart=0, ystop=0, xadjust=0):
pass
self.text = ''
self.decoded = []
self.surface = []
self.debug = debug
self.threshold = 0
self.ymin = ystart
self.ymax = ystop
self.xmin = xstart
self.xmax = xstop
self.xadjust = xadjust
self.image = image
self.pix = image.load()
self._crop()
self._scan(bright)
# Brightness is the average of RGB values
def _brightness(self, pixel):
#print max(pixel)
return ( pixel[0] + pixel[1] + pixel[2] ) / 3
# For highlighting on the debug dump
def _flip(self, pixel):
return max(pixel)
# The search is started from the "crop" edges.
# Either use crop boundary of the image size or the valyes supplied
# by the command line args
def _crop(self):
self.xsize, self.ysize = image.size
if self.xmax == 0:
self.xmax = self.xsize
if self.ymax == 0:
self.ymax = self.ysize
self.midx = self.xmin + (self.xmax - self.xmin) / 2 + self.xadjust
self.midy = self.ymin + (self.ymax - self.ymin) / 2
# heuristic for finding a reasonable cutoff brightness
def _find_threshold_brightness(self):
left = self._brightness(self.pix[self.xmin, self.midy])
right = self._brightness(self.pix[self.xmax - 1, self.midy])
return min(left, right, BRIGHTNESS_THRESHOLD) - 10
vals = []
last = 0
for x in xrange(self.xmin,self.xmax):
val = self._brightness(self.pix[x, self.midy])
if val > last:
left = val
else:
break
last = val
for x in xrange(self.xmax,self.xmin, -1):
val = self._brightness(self.pix[x, self.midy])
if val > last:
right = val
else:
break
right = val
print left, right
return min(left, right,200)
for x in xrange(self.xmin,self.xmax):
val = self._brightness(self.pix[x, self.midy])
vals.append(val)
vals.sort()
last_val = vals[0]
biggest_diff = 0
threshold = 0
for val in vals:
diff = val - last_val
#print val, diff
if val > 127 and val < 200 and diff >= 5:
biggest_diff = diff
threshold = val
last_val = val
if self.debug:
print "Threshold diff=", biggest_diff, "brightness=", val
return threshold - 10
# Find the left and right edges of the data area at probe_y and from that
# figure out the column and hole vertical dimensions at probe_y.
def _find_data_horiz_dimensions(self, probe_y):
left_border, right_border = self.xmin, self.xmax - 1
for x in xrange(self.xmin, self.midx):
if self._brightness(self.pix[x, probe_y]) < self.threshold:
left_border = x
break
for x in xrange(self.xmax-1, self.midx, -1):
if self._brightness(self.pix[x, probe_y]) < self.threshold:
right_border = x
break
width = right_border - left_border
card_side_margin_width = int(width * CARD_SIDE_MARGIN_RATIO)
data_left_x = left_border + card_side_margin_width
#data_right_x = right_border - card_side_margin_width
data_right_x = data_left_x + int((CARD_COLUMNS * width) * CARD_COL_WIDTH/CARD_WIDTH)
col_width = width * CARD_COL_WIDTH_RATIO
hole_width = width * CARD_HOLE_WIDTH_RATIO
#print col_width
if self.debug:
# mark left and right edges on the copy
for y in xrange(probe_y - self.ysize/100, probe_y + self.ysize/100):
self.debug_pix[left_border if left_border > 0 else 0,y] = 255
self.debug_pix[right_border if right_border < self.xmax else self.xmax - 1,y] = 255
for x in xrange(1, (self.xmax - self.xmin) / 200):
self.debug_pix[left_border + x, probe_y] = 255
self.debug_pix[right_border - x, probe_y] = 255
return data_left_x, data_right_x, col_width, hole_width
# find the top and bottom of the data area and from that the
# column and hole horizontal dimensions
def _find_data_vert_dimensions(self):
top_border, bottom_border = self.ymin, self.ymax
for y in xrange(self.ymin, self.midy):
#print pix[midx, y][0]
if self._brightness(self.pix[self.midx, y]) < self.threshold:
top_border = y
break
for y in xrange(self.ymax - 1, self.midy, -1):
if self._brightness(self.pix[self.midx, y]) < self.threshold:
bottom_border = y
break
card_height = bottom_border - top_border
card_top_margin = int(card_height * CARD_TOP_MARGIN_RATIO)
data_begins = top_border + card_top_margin
hole_height = int(card_height * CARD_HOLE_HEIGHT_RATIO)
data_top_y = data_begins + hole_height / 2
col_height = int(card_height * CARD_ROW_HEIGHT_RATIO)
if self.debug:
# mark up the copy with the edges
for x in xrange(self.xmin, self.xmax-1):
self.debug_pix[x,top_border] = 255
self.debug_pix[x,bottom_border] = 255
if self.debug:
# mark search parameters
for x in xrange(self.midx - self.xsize/20, self.midx + self.xsize/20):
self.debug_pix[x,self.ymin] = 255
self.debug_pix[x,self.ymax - 1] = 255
for y in xrange(0, self.ymin):
self.debug_pix[self.midx,y] = 255
for y in xrange(self.ymax - 1, self.ysize-1):
self.debug_pix[self.midx,y] = 255
return data_top_y, data_top_y + col_height * 11, col_height, hole_height
def _scan(self, bright=-1):
if self.debug:
# if debugging make a copy we can draw on
self.debug_image = self.image.copy()
self.debug_pix = self.debug_image.load()
self.threshold = bright if bright > 0 else self._find_threshold_brightness()
#x_min, x_max, col_width = self._find_data_horiz_dimensions(image, pix, self.threshold, self.ystart, self.ystop)
y_data_pos, y_data_end, col_height, hole_height = self._find_data_vert_dimensions()
data = {}
# Chads are narrow so find then heuristically by accumulating pixel brightness
# along the row. Should be forgiving if the image is slightly wonky.
y = y_data_pos #- col_height/8
for row_num in xrange(CARD_ROWS):
probe_y = y + col_height if row_num == 0 else ( y - col_height if row_num == CARD_ROWS -1 else y ) # Line 0 has a corner missing
x_data_left, x_data_right, col_width, hole_width = self._find_data_horiz_dimensions(probe_y)
left_edge = -1 # of a punch-hole
for x in xrange(x_data_left, x_data_right):
# Chads are tall so we can be sure if we probe around the middle of their height
val = self._brightness(self.pix[x, y])
if val >= self.threshold:
if left_edge == -1:
left_edge = x
if self.debug:
self.debug_pix[x,y] = self._flip(self.pix[x,y])
else:
if left_edge > -1:
hole_length = x - left_edge
if hole_length >= hole_width * 0.75:
col_num = int((left_edge + hole_length / 2.0 - x_data_left) / col_width + 0.25)
data[(col_num, row_num)] = hole_length
left_edge = -1
if (self.debug):
# Plot where holes might be on this row
expected_top_edge = y - hole_height / 2
expected_bottom_edge = y + hole_height / 2
blue = 255 * 256 * 256
for expected_left_edge in drange(x_data_left, x_data_right - 1, col_width):
for y_plot in drange(expected_top_edge, expected_bottom_edge, 2):
self.debug_pix[expected_left_edge,y_plot] = blue
#self.debug_pix[x + hole_width/2,yline] = 255 * 256 * 256
self.debug_pix[expected_left_edge + hole_width,y_plot] = blue
for x_plot in drange(expected_left_edge, expected_left_edge + hole_width):
self.debug_pix[x_plot, expected_top_edge] = blue
self.debug_pix[x_plot, expected_bottom_edge] = blue
y += col_height
if self.debug:
self.debug_image.show()
# prevent run-a-way debug shows causing my desktop to run out of memory
raw_input("Press Enter to continue...")
self.decoded = []
# Could fold this loop into the previous one - but would it be faster?
for col in xrange(0, CARD_COLUMNS):
col_pattern = []
col_surface = []
for row in xrange(CARD_ROWS):
key = (col, row)
# avergage for 1/3 of a column is greater than the threshold
col_pattern.append('O' if key in data else ' ')
col_surface.append(data[key] if key in data else 0)
tval = tuple(col_pattern)
global translate
self.text += translate[tval] if tval in translate else '@'
self.decoded.append(tval)
self.surface.append(col_surface)
return self
# ASCII art image of card
def dump(self, id, raw_data=False):
print ' Card Dump of Image file:', id, 'Format', 'Raw' if raw_data else 'Dump', 'threshold=', self.threshold
print ' ' + '123456789-' * (CARD_COLUMNS/10)
print ' ' + '_' * CARD_COLUMNS + ' '
print '/' + self.text + '_' * (CARD_COLUMNS - len(self.text)) + '|'
for rnum in xrange(len(self.decoded[0])):
sys.stdout.write('|')
if raw_data:
for val in self.surface:
sys.stdout.write(("(%d)" % val[rnum]) if val[rnum] != 0 else '.' )
else:
for col in self.decoded:
sys.stdout.write(col[rnum] if col[rnum] == 'O' else '.')
print '|'
print '`' + '-' * CARD_COLUMNS + "'"
print ' ' + '123456789-' * (CARD_COLUMNS/10)
print ''
if __name__ == '__main__':
usage = """usage: %prog [options] image [image...]
decode punch card image into ASCII."""
parser = OptionParser(usage)
parser.add_option('-b', '--bright-threshold', type='int', dest='bright', default=-1, help='Brightness (R+G+B)/3, e.g. 127.')
parser.add_option('-s', '--side-margin-ratio', type='float', dest='side_margin_ratio', default=CARD_SIDE_MARGIN_RATIO, help='Manually set side margin ratio (sideMargin/cardWidth).')
parser.add_option('-d', '--dump', action='store_true', dest='dump', help='Output an ASCII-art version of the card.')
parser.add_option('-i', '--display-image', action='store_true', dest='display', help='Display an anotated version of the image.')
parser.add_option('-r', '--dump-raw', action='store_true', dest='dumpraw', help='Output ASCII-art with raw row/column accumulator values.')
parser.add_option('-x', '--x-start', type='int', dest='xstart', default=0, help='Start looking for a card edge at y position (pixels)')
parser.add_option('-X', '--x-stop', type='int', dest='xstop', default=0, help='Stop looking for a card edge at y position')
parser.add_option('-y', '--y-start', type='int', dest='ystart', default=0, help='Start looking for a card edge at y position')
parser.add_option('-Y', '--y-stop', type='int', dest='ystop', default=0, help='Stop looking for a card edge at y position')
parser.add_option('-a', '--adjust-x', type='int', dest='xadjust', default=0, help='Adjust middle edge detect location (pixels)')
(options, args) = parser.parse_args()
for arg in args:
image = Image.open(arg)
card = PunchCard(image, bright=options.bright, debug=options.display, xstart=options.xstart, xstop=options.xstop, ystart=options.ystart, ystop=options.ystop, xadjust=options.xadjust)
print card.text
if (options.dump):
card.dump(arg)
if (options.dumpraw):
card.dump(arg, raw_data=True)
# main(["old_school.jpg", "-y", "5", "-b", "124"])
# O2Z-H4PAL52Z4-3F19-HV18-0LD$-SCH0-0L1S-4W3S-0M3!-31-@&Q@33@ @@@2LC_B&@@T&@L&W@&@