#!/usr/bin/env python
# encoding: utf-8
# PYTHON_ARGCOMPLETE_OK

# from __future__ imports must occur at the beginning of the file
from __future__ import unicode_literals
from __future__ import print_function
from __future__ import division

import os
import time
import io
import hashlib
import binascii

from . import const
from . import gvar
from . import util
from .util import (
	pdbg, pinfo, perr,
	getfilesize, getfilemtime_int,
	joinpath, formatex,
	jsonload, jsondump,
	removepath)

pr = util.pr

def convertbincache(info, key):
	if key in info:
		binhash = info[key]
		strhash = binascii.hexlify(binhash)
		info[key] = strhash

# in Pickle, i saved the hash (MD5, CRC32) in binary format (bytes)
# now i need to pay the price to save them using string format ...
# TODO: Rename
def stringifypickle(picklecache):
	for absdir in picklecache:
		entry = picklecache[absdir]
		for file in entry:
			info = entry[file]
			# 'crc32' is still stored as int (long),
			# as it's supported by JSON, and can't be hexlified
			#for key in ['md5', 'slice_md5', 'crc32']:
			for key in ['md5', 'slice_md5']:
				convertbincache(info, key)

# there is room for more space optimization (like using the tree structure),
# but it's not added at the moment. for now, it's just simple pickle.
# SQLite might be better for portability
# NOTE: file names are case-sensitive
class cached(object):
	''' simple decorator for hash caching (using pickle) '''
	usecache = True
	verbose = False
	debug = False
	hashcachepath = const.HashCachePath
	cache = {}
	cacheloaded = False
	dirty = False
	semaphore = None
	# we don't do cache loading / unloading here because it's an decorator,
	# and probably multiple instances are created for md5, crc32, etc
	# it's a bit complex, and i thus don't have the confidence to do it in ctor/dtor
	def __init__(self, f):
		super(cached, self).__init__()
		self.f = f

	def __call__(self, *args):
		assert len(args) > 0
		result = None
		path = args[0]
		dir, file = os.path.split(path) # the 'filename' parameter
		absdir = os.path.abspath(dir)
		if absdir in cached.cache:
			entry = cached.cache[absdir]
			if file in entry:
				info = entry[file]
				if self.f.__name__ in info \
					and info['size'] == getfilesize(path) \
					and info['mtime'] == getfilemtime_int(path) \
					and self.f.__name__ in info \
					and cached.usecache:
					result = info[self.f.__name__]
					if cached.debug:
						pdbg("Cache hit for file '{}',\n{}: {}\nsize: {}\nmtime: {}".format(
							path, self.f.__name__,
							result,
							info['size'], info['mtime']))
				else:
					result = self.f(*args)
					self._store(info, path, result)
			else:
				result = self.f(*args)
				entry[file] = {}
				info = entry[file]
				self._store(info, path, result)
		else:
			result = self.f(*args)
			cached.cache[absdir] = {}
			entry = cached.cache[absdir]
			entry[file] = {}
			info = entry[file]
			self._store(info, path, result)

		return result

	def _store(self, info, path, value):
		cached.dirty = True
		info['size'] = getfilesize(path)
		info['mtime'] = getfilemtime_int(path)
		info[self.f.__name__] = value
		if cached.debug:
			situation = "Storing cache"
			if cached.usecache:
				situation = "Cache miss"
			pdbg((situation + " for file '{}',\n{}: {}\nsize: {}\nmtime: {}").format(
				path, self.f.__name__,
				value,
				info['size'], info['mtime']))

		# periodically save to prevent loss in case of system crash
		now = time.time()
		if now - gvar.last_cache_save >= const.CacheSavePeriodInSec:
			if cached.debug:
				pdbg("Periodically saving Hash Cash")
			cached.savecache()
			gvar.last_cache_save = now

	# merge the from 'fromc' cache into the 'to' cache.
	# 'keepto':
	#  - True to keep the entry in 'to' cache when conflicting
	#  - False to keep the entry from 'fromc' cache
	# return number of conflict entries found
	@staticmethod
	def mergeinto(fromc, to, keepto = True):
		conflicts = 0
		for absdir in fromc:
			entry = fromc[absdir]
			if not absdir in to:
				to[absdir] = {}
			toentry = to[absdir]
			for file in entry:
				if file in toentry:
					if cached.debug:
						msg = "Cache merge conflict for: '{}/{}', {}: {}.".format(absdir, file,
							"Keeping the destination value" if keepto else
							"Using the source value",
							toentry[file] if keepto else entry[file])
						pdbg(msg)
					if not keepto:
						toentry[file] = entry[file]
					conflicts += 1
				else:
					toentry[file] = entry[file]

		return conflicts

	@staticmethod
	def ishexchar(c):
		return (c >= '0' and c <= '9') or (c >= 'a' and c <= 'f') or (c >= 'A' and c <= 'F')

	# pay the history debt ..., hashes were in binary format (bytes) in pickle
	@staticmethod
	def isbincache(cache):
		for absdir in cache:
			entry = cache[absdir]
			for file in entry:
				info = entry[file]
				if 'md5' in info:
					md5 = info['md5']
					# due to encrypt_md5()
					# for c in md5:
					for c in md5[:9] + md5[10:]:
						if not cached.ishexchar(c):
							return True
		return False

	@staticmethod
	def loadcache(existingcache = {}):
		# load cache even we don't use cached hash values,
		# because we will save (possibly updated) and hash values
		if not cached.cacheloaded: # no double-loading
			if cached.verbose:
				pr("Loading Hash Cache File '{}'...".format(cached.hashcachepath))

			if os.path.exists(cached.hashcachepath):
				try:
					cached.cache = jsonload(cached.hashcachepath)
					# pay the history debt ...
					# TODO: Remove some time later when no-body uses the old bin format cache
					if cached.isbincache(cached.cache):
						pinfo("ONE TIME conversion for binary format Hash Cache ...")
						stringifypickle(cached.cache)
						pinfo("ONE TIME conversion finished")
					if existingcache: # not empty
						if cached.verbose:
							pinfo("Merging with existing Hash Cache")
						cached.mergeinto(existingcache, cached.cache)
					cached.cacheloaded = True
					if cached.verbose:
						pr("Hash Cache File loaded.")
				#except (EOFError, TypeError, ValueError, UnicodeDecodeError) as ex:
				except Exception as ex:
					perr("Fail to load the Hash Cache, no caching.\n{}".format(formatex(ex)))
					cached.cache = existingcache
			else:
				if cached.verbose:
					pr("Hash Cache File '{}' not found, no caching".format(cached.hashcachepath))
		else:
			if cached.verbose:
				pr("Not loading Hash Cache since 'cacheloaded' is '{}'".format(cached.cacheloaded))

		return cached.cacheloaded

	@staticmethod
	def savecache(force_saving = False):
		saved = False
		# even if we were unable to load the cache, we still save it.
		if cached.dirty or force_saving:
			if cached.verbose:
				pr("Saving Hash Cache...")
			try:
				jsondump(cached.cache, cached.hashcachepath, cached.semaphore)
				if cached.verbose:
					pr("Hash Cache saved.")
				saved = True
				cached.dirty = False
			except Exception as ex:
				perr("Failed to save Hash Cache.\n{}".format(formatex(ex)))
		else:
			if cached.verbose:
				pr("Skip saving Hash Cache since it has not been updated.")

		return saved

	@staticmethod
	def cleancache():
		if cached.loadcache():
			for absdir in cached.cache.keys():
				if not os.path.exists(absdir):
					if cached.verbose:
						pr("Directory: '{}' no longer exists, removing the cache entries".format(absdir))
					cached.dirty = True
					del cached.cache[absdir]
				else:
					oldfiles = cached.cache[absdir]
					files = {}
					needclean = False
					for f in oldfiles.keys():
						#p = os.path.join(absdir, f)
						p = joinpath(absdir, f)
						if os.path.exists(p):
							files[f] = oldfiles[f]
						else:
							if cached.verbose:
								needclean = True
								pr("File '{}' no longer exists, removing the cache entry".format(p))

					if needclean:
						cached.dirty = True
						cached.cache[absdir] = files
		cached.savecache()

	@staticmethod
	def remove(path):
		def notfound():
			pdbg("Failed to delete cache: Path '{}' not found in cache.".format(path))

		dir, file = os.path.split(path)
		absdir = os.path.abspath(dir)
		if absdir in cached.cache:
			entry = cached.cache[absdir]
			if file in entry:
				del entry[file]
				pdbg("Cache for '{}' removed.".format(path))
				if not entry:
					del cached.cache[absdir]
					pdbg("Empty directory '{}' in cache also removed.".format(absdir))
			else:
				notfound()
		else:
			notfound()

	@staticmethod
	def remove_path_and_cache(path):
		result = removepath(path)
		if result == const.ENoError and os.path.isfile(path):
			cached.remove(path)
		return result

@cached
def md5(filename, slice = const.OneM):
	m = hashlib.md5()
	with io.open(filename, 'rb') as f:
		while True:
			buf = f.read(slice)
			if buf:
				m.update(buf)
			else:
				break

	return encrypt_md5(m.hexdigest())

def encrypt_md5(md5str):
	def validate_md5():
		if len(md5str) != 32:
			return md5str
		for i in range(32):
			v = int(md5str[i], 16)
			if v < 0 or v > 16:
				return md5str
	md5str = md5str[8:16] + md5str[0:8] + md5str[24:32] + md5str[16:24]
	encryptstr = ''
	for e in range(len(md5str)):
		encryptstr += hex(int(md5str[e], 16) ^ 15 & e)[2:3]
	return encryptstr[0:9] + chr(ord('g')+int(encryptstr[9], 16)) + encryptstr[10:]

# slice md5 for baidu rapidupload
@cached
def slice_md5(filename):
	m = hashlib.md5()
	with io.open(filename, 'rb') as f:
		buf = f.read(256 * const.OneK)
		m.update(buf)

	return m.hexdigest()

@cached
def crc32(filename, slice = const.OneM):
	with io.open(filename, 'rb') as f:
		buf = f.read(slice)
		crc = binascii.crc32(buf)
		while True:
			buf = f.read(slice)
			if buf:
				crc = binascii.crc32(buf, crc)
			else:
				break

	return crc & 0xffffffff

# vim: tabstop=4 noexpandtab shiftwidth=4 softtabstop=4 ff=unix fileencoding=utf-8
