#!/usr/bin/env python

# This needs to be placed in the byteflow root directory.

import getopt, sys
import random
import xml.dom.minidom
import logging
from datetime import datetime, tzinfo, timedelta

from django.core.management import setup_environ
import settings
setup_environ(settings)
from apps.blog.models import Post
from apps.discussion.models import CommentNode
from django.contrib.contenttypes.models import ContentType
from django.core.exceptions import ObjectDoesNotExist
from django.contrib.auth.models import User

# Global user options
utc_time = False
approved_comments_only = False

# Keep track of what we imported
loaded_users = 0
loaded_posts = 0
loaded_drafts = 0
loaded_comments = 0

def main():
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:avVu", ["help", "input=", "utc"])
    except getopt.GetoptError, err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
    input = None
    loglevel = logging.WARNING
    for o, a in opts:
        if o == "-v":
            loglevel = logging.INFO
        elif o == "-V":
            loglevel = logging.DEBUG
        elif o == "-u":
            globals()["utc_time"] = True
        elif o == "-a":
            globals()["approved_comments_only"] = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-i", "--input"):
            input = a
        else:
            assert False, "unhandled option"
    FORMAT = "%(message)s"
    logging.basicConfig(level=loglevel, format=FORMAT)
    if (input == None):
        logging.error("No input file provided.")
        usage()
        sys.exit(2)
    dom = xml.dom.minidom.parse(input)
    items = get_items(dom)
    for i in items:
        process_item(i)
    print "Success: Loaded %d users, %d posts (%d drafts), and %d comments." % (loaded_users,
                                                                                loaded_posts,
                                                                                loaded_drafts,
                                                                                loaded_comments)
    dom.unlink()

def usage():
    print """usage: ./WXRImporter.py [options] --input=[file]
Options:
-u    : use UTC time for posts and comments
-a    : only import approved comments
-v    : verbose output
-V    : more verbose
"""

def get_items(doc):
    """Return item elements from a DOM."""
    rss = get_first_element(doc, "rss")
    channel = get_first_element(rss, "channel")
    items = []
    for e in channel.childNodes:
        if e.nodeType == e.ELEMENT_NODE and e.localName == "item":
            items.append(e)
    return items

def process_item(item):
    """Load an individual item/post into Byteflow, including comments."""
    logging.info("--------------------")
    title = get_first_element_text(item, "title")
    author = get_first_element_text(item, "creator")
    content = get_first_element_text(item, "encoded")
    status = get_first_element_text(item, "status")
    post_date = get_first_element_text(item, "post_date")
    post_date_gmt = get_first_element_text(item, "post_date_gmt")
    if (utc_time):
        pdate = parse_date(post_date_gmt)
        pdate.replace(tzinfo=utc)
    else:
        pdate = parse_date(post_date)
    # Create the post
    post = Post()
    # Blank titles break the post admin screen
    if (title == u""):
        logging.info("Making up a title for this post")
        title = "untitled-%s" % str(generate_password(5))
    post.name = title
    try:
        post.author = User.objects.get(username=author)
    except ObjectDoesNotExist:
        logging.info("Creating user (%s) for post" % author)
        postuser = User()
        postuser.username = author
        postuser.set_password(generate_password(10))
        postuser.save();
        globals()["loaded_users"]+=1
        post.author = postuser
    post.text = content
    post.date = pdate
    if (status == "draft"):
        post.is_draft = True
        globals()["loaded_drafts"]+=1
    post.save()
    globals()["loaded_posts"]+=1
    logging.info("Created post: %s" % post.name)
    # Handle comments
    comments = []
    for c in item.childNodes:
        if c.nodeType == c.ELEMENT_NODE and c.localName == "comment":
            comments.append(c)
            process_comment(post,c)

def process_comment(post,comment):
    """Load a comment for a post into Byteflow."""
    author = get_first_element_text(comment, "comment_author")
    author_email = get_first_element_text(comment, "comment_author_email")
    author_url = get_first_element_text(comment, "comment_author_url")
    author_ip = get_first_element_text(comment, "comment_author_IP")
    content = get_first_element_text(comment, "comment_content")
    comment_date = get_first_element_text(comment, "comment_date")
    comment_date_gmt = get_first_element_text(comment, "comment_date_gmt")
    comment_approved = get_first_element_text(comment, "comment_approved")
    comment_type = get_first_element_text(comment, "comment_type")
    post_type = ContentType.objects.get(app_label="blog", model="post")
    commentNode = CommentNode()
    commentNode.content_type = post_type
    commentNode.object = post
    if (comment_approved == "1"):
        commentNode.approved = True
    else:
        commentNode.approved = False
    if (utc_time):
        pub_date = parse_date(comment_date_gmt)
        pub_date.replace(tzinfo=utc)
    else:
        pub_date = parse_date(comment_date)
    commentNode.pub_date = pub_date
    commentNode.upd_date = pub_date
    commentNode.body = content
    # Don't create a comment or user if the comment is
    # unapproved, and "-a" option was used.
    if ((not approved_comments_only) or commentNode.approved):
        try:
            logging.debug("Looking up user: %s [email: %s]..." %
                          (author, author_email))
            commentuser = User.objects.get(username=author, email=author_email)
            logging.debug("Found user %s, email: %s" %(commentuser.username,
                                                       commentuser.email))
        except ObjectDoesNotExist:
            matchingusers = User.objects.filter(email=author_email)
            if len(matchingusers) == 1:
                #use existing user based on email match
                commentuser = matchingusers[0]
            else:
                #Try to avoid integrity errors (username is not unique)
                matchingusers = User.objects.filter(username=author)
                if len(matchingusers) > 0:
                    commentuser = matchingusers[0]
                else:
                    logging.debug("User [%s] not found.  Creating..." % author)
                    commentuser = User()
                    commentuser.username = author[:30]
                    commentuser.site = author_url
                    commentuser.name = author[:30]
                    commentuser.email = author_email[:30]
                    commentuser.set_password(generate_password(10))
                    commentuser.save()
                    globals()["loaded_users"]+=1
                    logging.info("Created user: %s, email: %s" %
                                 (author, author_email))
        commentNode.user = commentuser
        commentNode.save()
        globals()["loaded_comments"]+=1
        logging.info("Created comment by %s" % author_email)
    else:
        logging.debug("Ignoring unapproved comment")

def parse_date(datestr):
    """Parse a datetime object from date format used in WXR."""
    pdate = datetime.now()
    try:
        pdate = datetime.strptime(datestr, "%Y-%m-%d %H:%M:%S")
    except ValueError:
        logging.debug("Couldn't parse date %s, using current time" % datestr)
    return pdate

def generate_password(size):
    """Generate a random numeric password with the given number of digits."""
    return "".join([str(random.randint(0,9)) for x in range(0,size)])

def get_first_element(dom, name):
    """Return first child element with a given name from a DOM."""
    found = ""
    for e in dom.childNodes:
        if e.nodeType == e.ELEMENT_NODE and e.localName == name:
            found = e
            break
    return found

def get_first_element_text(dom, name):
    """Return all text and CDATA directly under the first element named."""
    rc = u""
    node = get_first_element(dom,name)
    for n in node.childNodes:
        if (n.nodeType == n.TEXT_NODE) or (n.nodeType == n.CDATA_SECTION_NODE):
            rc += n.data
    return rc

ZERO = timedelta(0)
HOUR = timedelta(hours=1)
class UTC(tzinfo):
    """UTC"""
    def utcoffset(self, dt):
        return ZERO
    def tzname(self, dt):
        return "UTC"
    def dst(self, dt):
        return ZERO
utc = UTC()

if __name__ == "__main__":
    main()
