"""
This script generates the exif-mapping.php file by scraping the list of EXIF tags
at the ExifTool library website located at:

http://owl.phy.queensu.ca/~phil/exiftool/TagNames/EXIF.html
"""

import urllib2
import re
import pprint

print 'start'

url = 'http://owl.phy.queensu.ca/~phil/exiftool/TagNames/EXIF.html'

# load the page
page = urllib2.urlopen(url).read()

# pattern
p = re.compile(r"""
<td\s*title=[^>]*>(.*?)</td> # hex code
\s*
<td>(?:<u>)?(.*?)\s*< # name is sometimes inside <u></u> tag
""", re.VERBOSE)

# search for occurences
m = p.findall(page)

# pretty print found values
pprint.pprint(m)

# print count
print 'count: %s' % len(m)

# generate php file
with open('exif-mapping.php', 'w') as f:
    f.write('<?php\n')
    f.write('// This file was generated by collectexif.py\n\n')
    f.write('$this->EXIF_MAPPING = array(\n')
    for key, value in m:
        f.write("    {} => '{}',\n".format(key, value))
    f.write(')\n')
    f.write('?>')

# done
print 'done'
