From af8d78c87d6605753922f42ab69270ef9f5a4cc7 Mon Sep 17 00:00:00 2001 From: Aleksander Nowinski Date: Fri, 4 Dec 2015 15:06:56 +0100 Subject: [PATCH 1/5] Added DM dump functionality. Fixed bug causing not to store files.json if google drive file (without proper download link) appeared on file list. --- archive-slack.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/archive-slack.py b/archive-slack.py index e2413fa..df3681a 100755 --- a/archive-slack.py +++ b/archive-slack.py @@ -3,7 +3,7 @@ # $Id: archive-slack.py,v 1.6 2015/05/11 08:15:55 errror Exp $ # apt-get install python-anyjson -import httplib, anyjson, pprint, sys, os, getopt +import httplib, anyjson, pprint, sys, os, getopt, json # generic wrapper for slack api calls, error handling only basic def slackApi(function, args = {}): @@ -59,7 +59,8 @@ def getDMs(): # writes a json output file 'name.json' containing json serialization of 'data' def writeJson(name, data, subdir = "."): f = open(subdir+os.sep+name+'.json', 'w') - f.write(anyjson.serialize(data)) + f.write(json.dumps(data, sort_keys=True, indent=2)) +# f.write(anyjson.serialize(data)) f.close() # reads a json input file 'name.json' returning deserialized data @@ -171,15 +172,19 @@ def fetchFiles(files, oldfiles): infoprint(" "+f['name']) hcon = httplib.HTTPSConnection('slack-files.com') if not f.has_key('url_download'): + print("No 'url_download' in file: "+f['name']+"; skipping."); pprint.pprint(f) - hcon.request('GET', f['url_download'][23:]) - result = hcon.getresponse() - if result.status != 200: - print 'Error fetching file '+f['id']+' from '+f['url_download'] else: - out = open(outfilename, 'w') - out.write(result.read()) - out.close() + hcon.request('GET', f['url_download'][23:]) + result = hcon.getresponse() + if result.status != 200: + print 'Error fetching file '+f['id']+' from '+f['url_download'] + else: + out = open(outfilename, 'w') + out.write(result.read()) + out.close() + oldfiledict[f['id']]=f + return oldfiledict.values() def usage(exitcode): print "" @@ -222,9 +227,6 @@ def infoprint(text): if len(args) != 2: usage(1) token = args[1] - -quiet = False -verbose = False nopublic = False private = False @@ -265,4 +267,6 @@ def infoprint(text): writeJson('files', files) if private: infoprint("DMs") - fetchChannels(getDMs(), 'im', 'dms') + dms = getDMs() + writeJson("dms", dms) + fetchChannels(dms, 'im', 'dms') From 31c59eba4954e8516eb95d3e47471dcd6714b169 Mon Sep 17 00:00:00 2001 From: Aleksander Nowinski Date: Fri, 4 Dec 2015 15:15:11 +0100 Subject: [PATCH 2/5] Fixed unnecesary removal of verbose/quiet variables --- archive-slack.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/archive-slack.py b/archive-slack.py index df3681a..9627c48 100755 --- a/archive-slack.py +++ b/archive-slack.py @@ -227,6 +227,9 @@ def infoprint(text): if len(args) != 2: usage(1) token = args[1] + +quiet = False +verbose = False nopublic = False private = False From b49e5e78553299d75eea9c022f0231df9d847ced Mon Sep 17 00:00:00 2001 From: Aleksander Nowinski Date: Fri, 4 Dec 2015 15:20:14 +0100 Subject: [PATCH 3/5] An exporter which converts JSON files into html-s, in a reasonable and human readable form. Requires work on other message types and file handling. --- html/style.css | 50 ++++++++++++ json2html.py | 217 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 267 insertions(+) create mode 100644 html/style.css create mode 100644 json2html.py diff --git a/html/style.css b/html/style.css new file mode 100644 index 0000000..bb72366 --- /dev/null +++ b/html/style.css @@ -0,0 +1,50 @@ +body { + color: #555459; + font-family: "Helvetica Neue",Helvetica,"Segoe UI",Tahoma,Arial,sans-serif; + font-size: 16px; + line-height: 1rem; +} + +.message_user { + display: inline; +} + + +.message_content { +margin-left: 80px; +} + +.timestamp { + width: 80px; + /*margin-right: 2em;*/ + text-align: left; + position: absolute; + +} + +.message_user { +font-weight: 900; +} +.message { +padding-bottom: 2px; +padding-top: 1px; + +} + +.date_separator { + background: #fff none repeat scroll 0 0; + clear: both; + color: #2c2d30; + cursor: default; + font-family: Slack-Lato,appleLogo,sans-serif; + font-size: 0.9rem; + font-weight: 700; + line-height: 1rem; + margin: 1.2rem 0; + padding: 0; + position: relative; + text-align: center; + border-top: 1px solid black; + border-bottom: 1px solid black; + width: 90%; +} \ No newline at end of file diff --git a/json2html.py b/json2html.py new file mode 100644 index 0000000..2cd5187 --- /dev/null +++ b/json2html.py @@ -0,0 +1,217 @@ +#!/usr/bin/python +# apt-get install python-anyjson + + +import anyjson, pprint, sys, os, getopt, json, time + + + +# reads a json input file 'name.json' returning deserialized data +def readJson(name, subdir="."): + if os.path.isfile(subdir + os.sep + name + '.json'): + f = open(subdir + os.sep + name + '.json', 'r') + data = anyjson.deserialize(f.read()) + f.close() + return data + else: + return None + + +# writes a json output file 'name.json' containing json serialization of 'data' +def writeHTML(name, data, subdir="html"): + f = open(subdir + os.sep + name + '.html', 'w') + f.write(data) + f.close() + + +def itemName(item, users): + if 'name' in item: + return item['name'] + else: + return dmUserName(item, users) + + +# format a single message according to the rules +def formatMessage(message, users): + res = '
' + \ + time.strftime('%H:%M:%S', time.localtime(float(message['ts']))) + \ + '
' + userColor='' + if 'username' in message: + username = message['username'] + elif 'user' in message: + uid = message['user'] + username= user_name(uid, users) + if uid in users and 'color' in users[uid]: + userColor='style="color: #'+users[uid]['color']+'"' + else: + username = '????' + res += '
'+username + '
' + res += message['text'] + res += '
' + return res + + +# returns basic header for all generated html files +def htmlHeader(title): + res = ''' + + + + ''' + title + ''' + + + + ''' + return res + +# default footer. +def htmlFooter(): + return '''''' + +# formats a channel contents into a single html returned as string. +def prepareChannelContent(channel, users, item_type='channel'): + item_name=itemName(channel, users) + content = htmlHeader(item_name) + content += '

' + ('#' if item_type=='channel'else '') + item_name + '

\n' + messages = channel['messages'] + current_day = None + for message in messages: + mg_day = time.strftime('%Y%j', time.localtime(float(message['ts']))) + if (current_day != mg_day): + current_day = mg_day + content += '
' + time.strftime('%Y-%m-%d', + time.localtime(float(message['ts']))) + '
\n' + content += formatMessage(message, users) + content += '\n' + content += htmlFooter() + return content + + +def prepareGroupList(channels): + html='
    \n' + if channels: + channel_ids = sorted(channels.keys(), key=lambda key: channels[key]['name']) + for channel_id in channel_ids: + channel = channels[channel_id] + html += '
  • ' + channel['name'] + '
  • \n' + html += '
\n' + return html + + +#Human readable user name (include Slackbot and unknown cases) +def user_name(uid, users): + if uid in users: + return users[uid]['name'] + elif uid=='USLACKBOT': + return "Slackbot" + else: + return "Unknown ("+uid+")" + +def dmUserName(dm, users): + uid=dm['user'] + return user_name(uid, users) + +#prepare index.html with links to +def prepareTOC(channels, groups, dms, users): + html=htmlHeader('Slack dump') + if channels: + html += '

Channels

\n' + html += prepareGroupList(channels) + if groups: + html += '

Groups

\n' + html += prepareGroupList(groups) + if dms: + html += '

DMs

\n' + html += '
    \n' + dms_ids=sorted(dms.keys(), key=lambda key: dmUserName(dms[key], users).lower()) + for dm_id in dms_ids: + dm = dms[dm_id] + username = dmUserName(dm, users) + html += '
  • '+username+'
  • \n' + html += '
\n' + + html += htmlFooter() + return html + + +def verboseprint(text): + if verbose: + print(text) + #text.encode('ascii', 'ignore') + + +def infoprint(text): + if not quiet: + print(text) + #text.encode('ascii', 'ignore') + + + +def exportClass(items, users, type, dir): + for item in items: + verboseprint('Exporting '+type+' '+itemName(items[item], users)) + item_content = readJson(item, dir) + html = prepareChannelContent(item_content, users, type) + writeHTML(item, html) + return + +def usage(exitcode): + print("") + print("Usage: json2html.py [options] ") + print("") + print("Run this program in the root directory where previously archive-slack has been run.") + print("Options:") + print(" -h --help : print(this help") + print(" -q --quiet : no output except errors") + print(" -v --verbose : verbose output") + print("") + exit(exitcode) + +opts, args = getopt.gnu_getopt(sys.argv, 'hqv', ['help', 'quiet', 'verbose']) +# and a authentication token, given via cmdline arg, use https://api.slack.com/#auth to generate your own +if len(args) != 1: + usage(1) + +quiet = False +verbose = False + +for o, v in opts: + if o == '--help' or o == '-h': + usage(0) + elif o == '--quiet' or o == '-q': + quiet = True + elif o == '--verbose' or o == '-v': + verbose = True + else: + usage(1) + +# verbose overrides quiet +if verbose: + quiet = False + +#read channels +if not os.path.isdir("html"): + infoprint("Creating 'html' directory") + os.mkdir("html") +channels = readJson("channels") +groups = readJson("groups") +dms = readJson("dms") +users = readJson("users") +infoprint('Preparing index.html') +writeHTML("index", prepareTOC(channels, groups, dms, users)) +infoprint("\n\nPreparing CHANNELS:") +exportClass(channels, users, 'channel', 'channels') +if groups: + infoprint("\n\nPreparing GROUPS:") + exportClass(groups, users, 'group', 'groups') +if dms: + infoprint("\n\nPreparing DMs:") + exportClass(dms, users, 'dm', 'dms') + +# for channel in channels: +# channelContent = readJson(channel, "channels") +# verboseprint("Formatting channel: " + channelContent['name']); +# html = prepareChannelContent(channelContent, users) +# writeHTML(channel, html) +infoprint("Export complete.") From f78e20b0274469ad48b0c944c29106e47d1e9abc Mon Sep 17 00:00:00 2001 From: Aleksander Nowinski Date: Sun, 20 Dec 2015 17:08:58 +0100 Subject: [PATCH 4/5] Fixed problem with encoding causing fail with python2 Fixed problem with wrong css location, now css is copied to the directory created. Fixed problem with messages having no 'text' key, now they are skipped. --- json2html.py | 13 +++++++++++-- html/style.css => style.css | 0 2 files changed, 11 insertions(+), 2 deletions(-) mode change 100644 => 100755 json2html.py rename html/style.css => style.css (100%) diff --git a/json2html.py b/json2html.py old mode 100644 new mode 100755 index 2cd5187..c7177e5 --- a/json2html.py +++ b/json2html.py @@ -2,7 +2,7 @@ # apt-get install python-anyjson -import anyjson, pprint, sys, os, getopt, json, time +import anyjson, pprint, sys, os, getopt, json, time, shutil @@ -20,7 +20,7 @@ def readJson(name, subdir="."): # writes a json output file 'name.json' containing json serialization of 'data' def writeHTML(name, data, subdir="html"): f = open(subdir + os.sep + name + '.html', 'w') - f.write(data) + f.write(data.encode('utf-8')) f.close() @@ -32,7 +32,14 @@ def itemName(item, users): # format a single message according to the rules +# FIXME: should be improved for different message types def formatMessage(message, users): + #Quick and dirty workarount for no 'text' key. + if not 'text' in message: + print("Unexpected message, no 'text' key:") + print(json.dumps(data, sort_keys=True, indent=2).encode('utf-8')) + return "" + res = '
' + \ time.strftime('%H:%M:%S', time.localtime(float(message['ts']))) + \ '
' @@ -194,6 +201,8 @@ def usage(exitcode): if not os.path.isdir("html"): infoprint("Creating 'html' directory") os.mkdir("html") +#copying the style.css file: +shutil.copy2(sys.path[0]+os.sep+'style.css', 'html') channels = readJson("channels") groups = readJson("groups") dms = readJson("dms") diff --git a/html/style.css b/style.css similarity index 100% rename from html/style.css rename to style.css From aad35078f3eb6c5d567b621fdc469835056465a7 Mon Sep 17 00:00:00 2001 From: Aleksander Nowinski Date: Sun, 20 Dec 2015 17:30:55 +0100 Subject: [PATCH 5/5] Fixed wrong error message causing fail on messages with no 'text' key. --- json2html.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/json2html.py b/json2html.py index c7177e5..20273ed 100755 --- a/json2html.py +++ b/json2html.py @@ -37,7 +37,7 @@ def formatMessage(message, users): #Quick and dirty workarount for no 'text' key. if not 'text' in message: print("Unexpected message, no 'text' key:") - print(json.dumps(data, sort_keys=True, indent=2).encode('utf-8')) + print(json.dumps(message, sort_keys=True, indent=2).encode('utf-8')) return "" res = '
' + \