{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### https://www.reddit.com/r/learnpython/comments/74u1bz/using_pako_deflate_with_python/" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "import gzip\n", "import json\n", "import base64\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "from clustergrammer2 import net" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "# these are two functions that save and load data in json\n", "\n", "# save dict to json\n", "def save_to_json(inst_dict, filename, indent=True):\n", " import json\n", "\n", " # save as a json\n", " fw = open(filename, 'w')\n", " if indent == True:\n", " fw.write( json.dumps(inst_dict, indent=2) )\n", " else:\n", " fw.write( json.dumps(inst_dict) )\n", " fw.close()\n", "\n", "\n", "# load json to dict\n", "def load_to_dict( filename ):\n", " import json\n", " # load\n", " f = open(filename,'r')\n", " inst_dict = json.load(f)\n", " f.close()\n", " return inst_dict" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load large Viz-JSON and save Compressed Version" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# json.dumps(inst_dict)\n", "# viz_json = load_to_dict('data/big_data/mult_view.json')\n", "# viz_json = load_to_dict('data/big_data/mnist.json')\n", "viz_json = load_to_dict('data/big_data/ccle_1000x1037.json')\n", "# viz_json = load_to_dict('data/big_data/ccle_7000x1037.json')" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "save_to_json(viz_json, 'data/big_data/uncompressed.json')" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "\"{'mat': [[-0.657802683073748, \"" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "str(viz_json)[:30]" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'{\"mat\": [[-0.657802683073748, '\n" ] } ], "source": [ "viz_json_str = json.dumps(viz_json, ensure_ascii=False).encode('utf-8')\n", "print(viz_json_str[:30])\n", "zipped = gzip.compress(viz_json_str)\n", "zb64 = base64.b64encode(zipped)\n", "# zb64_json = {'something':str(zb64)}\n", "zb64_json = {'compressed': zb64.decode(\"utf-8\")}\n", "zb64_json_str = json.dumps(zb64_json)\n", "save_to_json(zb64_json_str, 'data/big_data/compressed.json')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Large gzipped tsv, round to two decimals, cluster, saved compressed json" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv('data/big_data/CCLE.txt.gz', compression='gzip', index_col=0)\n", "\n", "from ast import literal_eval as make_tuple\n", "cols = df.columns.tolist()\n", "new_cols = [make_tuple(x) for x in cols]\n", "df.columns = new_cols" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(18874, 1037)" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "net.load_df(df)\n", "net.filter_N_top(inst_rc='row', N_top=1000, rank_type='var')\n", "net.normalize(axis='row', norm_type='zscore')\n", "df_z = net.export_df().round(2)" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "net.load_df(df_z)\n", "# net.filter_N_top(inst_rc='row', N_top=1000, rank_type='var')\n", "# net.normalize(axis='row', norm_type='zscore')\n", "net.cluster()\n", "net.write_json_to_file('viz', 'data/big_data/round_uncompressed.json')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "# viz_json = net.export_net_json()" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "# save_to_json(viz_json, 'data/big_data/round_uncompressed.json')" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "viz_json = load_to_dict('data/big_data/round_uncompressed.json')" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "b'{\"row_nodes\": [{\"name\": \"KRT19'\n" ] } ], "source": [ "viz_json_str = json.dumps(viz_json, ensure_ascii=False).encode('utf-8')\n", "print(viz_json_str[:30])\n", "zipped = gzip.compress(viz_json_str)\n", "zb64 = base64.b64encode(zipped)\n", "# zb64_json = {'something':str(zb64)}\n", "zb64_json = {'compressed': zb64.decode(\"utf-8\")}\n", "zb64_json_str = json.dumps(zb64_json)\n", "save_to_json(zb64_json_str, 'data/big_data/round_compressed.json')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "my_dict = {'test': 'something!!!!!!!!'}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = json.dumps(my_dict, ensure_ascii=False).encode('utf-8')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### gzip will naturally produce a byte string" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "zipped = gzip.compress(data)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "zipped" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### which b64encode can naturally use to produce the base64 byte string" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tmp = base64.b64encode(zipped)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tmp" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "zipped" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gzip.decompress(zipped)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# data = json.dumps(viz_json, ensure_ascii=False).encode('utf-8')\n", "# zipped = gzip.compress(data)\n", "# viz_json_b64 = base64.b64encode(zipped)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "tmp_json = {'something':'something'}\n", "json.dumps(tmp_json)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "tmp_json = {'something':str(viz_json_b64)}\n", "tmp_json_str = json.dumps(tmp_json)\n", "# tmp_json = {'something':zipped}" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": false }, "outputs": [], "source": [ "# comp_json = {'compressed':tmp}\n", "# data = json.dumps(comp_json, ensure_ascii=False).encode('utf-8')\n", "# # zipped = gzip.compress(data)\n", "# # tmp = base64.b64encode(zipped)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load Real File" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from clustergrammer import *\n", "net = Network()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "net.load_file('data/txt/rc_two_cats.txt')\n", "net.cluster()\n", "viz_json = net.export_net_json()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "viz_json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "save_to_json(viz_json, 'data/big_data/uncompressed.json')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Save compressed JSON" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# viz_json_str" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "viz_json_str = json.dumps(viz_json, ensure_ascii=False).encode('utf-8')\n", "zipped = gzip.compress(viz_json_str)\n", "zb64 = base64.b64encode(zipped)\n", "# zb64_json = {'something':str(zb64)}\n", "zb64_json = {'compressed': zb64.decode(\"utf-8\")}\n", "zb64_json_str = json.dumps(zb64_json)\n", "save_to_json(zb64_json_str, 'data/big_data/compressed.json')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "type(zb64)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# zb64_json" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# zb64.decode(\"utf-8\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Load large Viz-JSON" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# json.dumps(inst_dict)\n", "# viz_json = load_to_dict('data/big_data/mult_view.json')\n", "# viz_json = load_to_dict('data/big_data/mnist.json')\n", "viz_json = load_to_dict('data/big_data/ccle_1000x1037.json')\n", "# viz_json = load_to_dict('data/big_data/ccle_7000x1037.json')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "str(viz_json)[:30]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "viz_json_str = json.dumps(viz_json, ensure_ascii=False).encode('utf-8')\n", "zipped = gzip.compress(viz_json_str)\n", "zb64 = base64.b64encode(zipped)\n", "# zb64_json = {'something':str(zb64)}\n", "zb64_json = {'compressed': zb64.decode(\"utf-8\")}\n", "zb64_json_str = json.dumps(zb64_json)\n", "save_to_json(zb64_json_str, 'data/big_data/compressed.json')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "zb64_json.keys()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }