#!/usr/bin/python
# -*- coding: utf-8 -*-
from urllib import urlencode
from urllib2 import urlopen
from base64 import encode
from hashlib import sha224
ERROR_MSG = "No image'
IMG_URL = 'http://eur.i1.yimg.com/us.yimg.com/i/us/we/intl/26.gif'
def hash_img(img):
return sha224(img).hexdigest()
def get_img(img_url):
try:
response = urlopen(img_url)
img = response.read()
return {'base64_img': img.encode('base64'),
'content-type': response.info()['Content-Type'],
'hash': hash_img(img)}
except:
return ERROR_MSG
print get_img(IMG_URL)
Refactorings
No refactoring yet !
akaihola
October 9, 2008, October 09, 2008 11:22, permalink
You should probably be more specific when catching exceptions on line 23. Only catch exceptions which urllib2 is known to be able to throw in this case, and let other exceptions propagate back to the caller.
Whether to assign values to variables before constructing the dictionary is a matter of taste, since you're using the values only once.
See http://en.wikipedia.org/wiki/SHA for a comparison of different SHA algorithms. If there's no specific reason to do otherwise, I'd use either SHA-1 or MD5.
In case of error I'd like more to return a similar dictionary with None as values. An additional 'error' key could be used to indicate the error (None=no error, string=error description).
No need to import base64.encode or urllib.urlencode.
Since the functions are not specific to images, I'd rename identifiers accordingly.
#!/usr/bin/python
# -*- coding: utf-8 -*-
from urllib2 import urlopen, URLError
from hashlib import sha1
ERROR_MSG = 'Image not found'
IMG_URL = 'http://eur.i1.yimg.com/us.yimg.com/i/us/we/intl/26.gif'
def create_hash(data):
return sha1(data).hexdigest()
def get_doc_info(url):
result = dict.fromkeys(('base64_data', 'content-type', 'hash', 'error'))
try:
response = urlopen(url)
data = response.read()
result.update({'base64_data': data.encode('base64'),
'content-type': response.info()['Content-Type'],
'hash': create_hash(data)})
except URLError:
result['error'] = ERROR_MSG
return result
print get_doc_info(IMG_URL)
a
November 29, 2011, November 29, 2011 09:11, permalink
XGBkaJSYnKCkqNDU2Nzg5OkNERUZHSElKU1RVVldYWVpjZGVmZ2hpanN0dXZ3eHl6g4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2drh4uPk5ebn6Onq8fLz9PX29/j5+gEAAwEBAQEBAQEBAQAAAAAAAAECAwQFBgcICQoLEQACAQIEBAMEBwUEBAABAncAAQIDEQQFITEGEkFRB2FxEyIygQgUQpGhscEJIzNS8BVictEKFiQ04SXxFxgZGiYnKCkqNTY3ODk6Q0RFRkdISUpTVFVWV1hZWmNkZWZnaGlqc3R1dnd4eXqCg4SFhoeIiYqSk5SVlpeYmZqio6Slpqeoqaqys7S1tre4ubrCw8TFxsfIycrS09TV1tfY2dri4+Tl5ufo6ery8/T19vf4+fr/2gAMAwEAAhEDEQA/AJ9Z1G8i1e5SO6mRFbAVXIA4oAqDVL//AJ/J/wDvs0AI2qX/APz+z/8Afw0ARNq2of8AP7cf9/DQBE2sakOl/cf9/DQBC2s6mP8AmIXP/f00Aen27FreMkkkqCfyoA4HXjjW7v8A3/6CgBdK0q41QS/Z3jXy8Z3kjrn0B9KALsnhS/WNmMtudoJwGbP/AKDQBzrGgCFzQBA5oA9etv8Aj1h/3B/KgDz7xA2Ndu/9/wDoKANrwSS0V+BycJ/7NQBkTaNq0MLyyW7qiKWY7xwB170ATW+j219oEt5aySm6hB3xkgjjk44z06UAQXuk29joEN3cySi7n5jiBAAHXJ4z0/mKAOdc0wPYbX/j1h/3F/lSA858RtjX7z/f/oKANvwM37nUSOoCfyagDAl1vUZY2jkvJWRgQyluCD2oA2PCG+zgvdTncpaIm0j++Rzx9On40AJ43geZbXU4XMlq6BR6LnkH8f6UAca7UwPZbX/j1h/3F/lSA8z8TNjxDej/AG/6CmBBYaveaasq2k3liXAf5Qc4zjqPc0AUi9AFqXV7yTTlsGmH2VMEIFUe/JAyaAE/tu/XTTp/ng2pBGxkU989SM9aQGU70Ae12v8Ax6w/7i/yoA8w8UK//CR3uEYjf2HsKYGXtk/55v8AkaAArJ/zzb8jQAwrJ/zzf8jQBGySn/lm/wD3yaQDfJlP/LN/++TQB7Zag/ZYeP4F/lQA9o0LElFJ9xQAvlR/880/75FAB5Uf/PNP++RQAeVH/wA80/75FAB5Uf8AzzT/AL5FMA8qP/nmn/fIoAcBxSA//9k=
Is this a correct a way to get an image, its content-type, encode it in base64 and get a hash from it ?
Should I assign variables to 'img.encode('base64')', 'response.info()['Content-Type']', 'hash_img(img)' and then pass them as dictionary values ?
Should I use a specific function to get the hash ?