# Copyright 2021-2022 MinusOne, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
# files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
# modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom
# the Software is furnished to do so, subject to the following conditions:
# 
# The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
# IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
import sys
import json
import boto3
from botocore import UNSIGNED
from botocore.config import Config
import time
from pebble import ProcessPool, ProcessExpired
from datetime import datetime
import traceback

from m1 import M1Client

# This UNSIGNED config is only here to to access the m1-public bucket even if you haven't set up AWS credentials
# If you're modifying this file to load data available in your own S3 bucket you should rely upon aws/boto3
# documentation to set up up credentials correctly.
S3_CLIENT = boto3.client("s3", config = Config(signature_version=UNSIGNED)) 
DATA_BUCKET = "m1-public"

CONFIG_SERVER_KEY = "server"
CONFIG_USERNAME_KEY = "username"
CONFIG_PASSWORD_KEY = "password"
CONFIG_IP_KEY = "ip"

BOOST_STORE_TYPE = "boost"

def getS3Objects(bucket, prefix=""):
	kwargs = {"Bucket": bucket, "Prefix": prefix}
	while True:
		resp = S3_CLIENT.list_objects_v2(**kwargs)
		if "Contents" not in resp:
			break
		for obj in resp['Contents']:
			obj["Bucket"] = bucket
			yield obj
		try:
			kwargs['ContinuationToken'] = resp['NextContinuationToken']
		except KeyError:
			break

def doInPool(desc, generator, action):
	startTime = datetime.now();
	print(f"Starting {desc}: {startTime}")
	with ProcessPool(max_workers = PUBLISH_POOL_SIZE) as pool:
		future = pool.map(action, generator())
		iterator = future.result()

		while True:
			try:
				result = next(iterator)
			except StopIteration:
				break
			except Exception as e:
				print("Problem: ", e)
				print(traceback.format_exc())
	print(f"{desc} took:", (datetime.now() - startTime))

def publishFile(s3obj):
	RULES = json.dumps({
		"created_utc" : { "dateFormat" : "epoch"},
		"retrieved_on" : { "dateFormat" : "epoch"}
	})

	s3file = f"s3://{s3obj['Bucket']}/{s3obj['Key']}"
	startTime = datetime.now();
	print("Start:", s3file, startTime)
	localEnv = M1Client(config[CONFIG_SERVER_KEY], config[CONFIG_USERNAME_KEY], config[CONFIG_PASSWORD_KEY], config.get(CONFIG_IP_KEY))
	localEnv.postRetryForever("/publish", { "s3file" : s3file, "rules" : RULES})

	endTime =  datetime.now()
	print("Finish:", s3file, endTime, endTime - startTime)

def getFilesToLoad():
	return getS3Objects(DATA_BUCKET, "reddit/")

def loadData():
	doInPool("data load", getFilesToLoad, publishFile)

def toggleAutoCommit(store):
	ENV.postRetryForever("/store/autocommit", { "store" : store,
																							"seconds" : 5})
	ENV.postRetryForever("/store/autocommit", { "store" : store,
																							"seconds" : -1})
	time.sleep(5) # wait for autocommit to catch up
	
def getRangesToReIndex():
	PAGE_SIZE = 10000
	current = 0
	while True:
		currentMax = int(ENV.getRetryForever("/next"))
		if current >= currentMax:
			break
		while current < currentMax:
			yield (current, current + PAGE_SIZE)
			current += PAGE_SIZE

def indexRange(startAndEnd):
	store = "big"
	startTime = datetime.now()
	localEnv = M1Client(config[CONFIG_SERVER_KEY], config[CONFIG_USERNAME_KEY], config[CONFIG_PASSWORD_KEY], config.get(CONFIG_IP_KEY))
	ENV.postRetryForever("/index", { "store" : store,
																	 "start" : startAndEnd[0],
																	 "end"   : startAndEnd[1]})
	print("Finish", startAndEnd[1], datetime.now() - startTime)

def indexData(store):
	doInPool("reindex", getRangesToReIndex, indexRange)

def addStore(store, storeType, shards):
	print(f"Adding store {store}")
	replicas = 1
	OPS.postRetryForever("/env/store/create", { "env"      : ENVIRONMENT_KEY,
																							"store"    : store,
																							"type"     : storeType,
																							"shards"   : shards,
																							"replicas" : replicas})

	ENV.postRetryForever("/store/add", { "store"    : store,
																			 "type"     : storeType,
																			 "shards"   : shards,
																			 "replicas" : replicas })
	docCount(store) # to insure it's up

def dropStore(store):
	print(f"Dropping store {store}")
	OPS.postRetryForever("/env/store/destroy", { "env"   : ENVIRONMENT_KEY,
																							 "store" : store})
	ENV.postRetryForever("/store/drop", { "store" : store })
	

def rescale(servers):
	print(f"Rescaling to {servers}")
	OPS.postRetryForever("/env/rescale", { "env"     : ENVIRONMENT_KEY,
 																				 "servers" : servers})

def docCount(store):
	return json.loads(ENV.postRetryForever("/query", { "q" : "*", "store" : store }))["response"]["numFound"]

def printCount(store):
	print(f"# docs in {store}:", docCount(store))

def tryBackup(store):
	# perhaps you want a better name for your backup here
	backup = json.loads(ENV.postRetryForever("/store/backup/create", { "store" : store, "name" : "initial-" + str(docCount(store)) }))
	print(backup)
	while True:
		backups = dict([(x["id"], x) for x in json.loads(ENV.getRetryForever("/store/backup/list"))]) # TODO would be nice to have a /get endpoint
		if backups[backup["id"]]["status"] in ("completed", "failed"):
			return backups[backup["id"]]
		time.sleep(2)

def backup(store):
	print("Starting backup:", datetime.now())
	startTime = datetime.now();

	while True:
		result = tryBackup(store)
		print(result)
		if result["status"] == "completed":
			break
		elif result["status"] == "failed":
			print("Backup failed; retrying")
		else:
			raise Exception(f"Unanticipated backup result: {result['status']}")

	print("Backup took: {}s".format(datetime.now() - startTime))
	return result["id"]

def readyToRestart(store):
	health = json.loads(ENV.getRetryForever("/health"))
	# TODO should clean up the health structure, it's verbose and not convenient to use
	for shardKey in health["stores"]["details"]:
		if shardKey.startswith(f"Node {store}") and health["stores"]["details"][shardKey]["message"] != "Requires restart":
			return False
	return True

def restore(store, backupId):
	startTime = datetime.now()
	print("Starting restore", startTime)
	ENV.postRetryForever("/store/backup/restore", { "store" : store, "backup" : backupId })
	while True:
		if readyToRestart(store):
			break
		time.sleep(2)
	print("Restore complete, rebooting servers")
	OPS.postRetryForever("/env/reboot", { "env" : ENVIRONMENT_KEY, "group" : store })
	while True:
		health = json.loads(ENV.getRetryForever("/health"))
		if health["overall"]["status"] == "PASSED":
			break
		time.sleep(2)
	print("Restore took: {}".format(datetime.now() - startTime))

def setSchema():
	print("Setting schema")
	with open("schema.json") as f:
		schema = f.read()
	ENV.postRetryForever("/schema/wipe", {})
	ENV.postRetryForever("/schema/add", { "properties" : schema})

def registerBucket():
	OPS.postRetryForever("/env/bucket/register", { "env" : ENVIRONMENT_KEY, "bucket" : DATA_BUCKET })

def bulkLoadFromArchive(numShards):
	startTime = datetime.now()
	registerBucket()
	rescale(NUM_DATA_PROCESSING_SERVERS)
	addStore("big", BOOST_STORE_TYPE, numShards)
	setSchema()
	loadData()
	toggleAutoCommit("big")
	printCount("big")
	rescale(1)
	backupId = backup("big")
	dropStore("big")
	addStore("index", "standard", numShards)
	setSchema() # TODO this is lame
	restore("index", backupId)
	printCount("index")
	print(f"Total time:", (datetime.now() - startTime))


# This could be used to create a short lived store for querying purposes or to resize an existing store that requires extra storage space
def createStoreFromDatalake(newStore, numShards):
	rescale(NUM_DATA_PROCESSING_SERVERS)
	addStore("big", BOOST_STORE_TYPE, numShards)
	setSchema()
	indexData("big")
	printCount("big")
	rescale(1)
	backupId = backup("big")
	dropStore("big")
	addStore(newStore, "standard", numShards)
	restore(newStore, backupId)
	printCount(newStore)
	# If this was done to replace an existing store you'd probably would want to delete that old store now

if len(sys.argv) != 4:
	raise Exception("Usage: python bulkload-sample.py <environment key> <ops config file> <environment config file>")

ENVIRONMENT_KEY = sys.argv[1]

with open(sys.argv[2]) as f:
	config = json.load(f)
	OPS = M1Client(config[CONFIG_SERVER_KEY], config[CONFIG_USERNAME_KEY], config[CONFIG_PASSWORD_KEY], config.get(CONFIG_IP_KEY))

with open(sys.argv[3]) as f:
	config = json.load(f)
	ENV = M1Client(config[CONFIG_SERVER_KEY], config[CONFIG_USERNAME_KEY], config[CONFIG_PASSWORD_KEY], config.get(CONFIG_IP_KEY))

NUM_DATA_PROCESSING_SERVERS = 32
PUBLISH_POOL_SIZE = 80

if __name__ == "__main__":
	bulkLoadFromArchive(1)
