From 68f0df96a9e2d1f296338307ce5e9d1c0bc085c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gijs=20Vermari=C3=ABn?= Date: Mon, 1 Jul 2019 13:19:39 +0200 Subject: [PATCH] Upload the filter and install script --- README.md | 1 + install.sh | 11 +++++ ipynb_drop_output.py | 106 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 118 insertions(+) create mode 100644 README.md create mode 100644 install.sh create mode 100755 ipynb_drop_output.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..5a12704 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +A tool for scrubbing .ipynb files before each git commands applies to them diff --git a/install.sh b/install.sh new file mode 100644 index 0000000..f7d35ad --- /dev/null +++ b/install.sh @@ -0,0 +1,11 @@ +TARGETDIR=~/.scripts/ +SCRIPT=ipynb_drop_output.py +mkdir $TARGETDIR +cp ./${SCRIPT} $TARGETDIR +chmod +x $TARGETDIR$SCRIPT +touch ~/.gitattributes +echo "*.ipynb filter=clean_ipynb" > ~/.gitattributes +git config --global core.attributesfile ~/.gitattributes +git config --global filter.clean_ipynb.clean $TARGETDIR$SCRIPT +git config --global filter.clean_ipynb.smudge cat +echo "installed the ipynb filter" diff --git a/ipynb_drop_output.py b/ipynb_drop_output.py new file mode 100755 index 0000000..c296bd0 --- /dev/null +++ b/ipynb_drop_output.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python + +""" +Suppress output and prompt numbers in git version control. + +This script will tell git to ignore prompt numbers and cell output +when looking at ipynb files if their metadata contains: + + "git" : { "suppress_output" : true } + +The notebooks themselves are not changed. + +See also this blogpost: http://pascalbugnion.net/blog/ipython-notebooks-and-git.html. + +Usage instructions +================== + +Make a .sh file of the following lines, and run this in order to enable the filter +-------------- +TARGETDIR=~/.scripts/ +SCRIPT=ipynb_drop_output.py +mkdir $TARGETDIR +cp ./${SCRIPT} $TARGETDIR +chmod +x $TARGETDIR$SCRIPT +touch ~/.gitattributes +echo "*.ipynb filter=clean_ipynb" > ~/.gitattributes +git config --global core.attributesfile ~/.gitattributes +git config --global filter.clean_ipynb.clean $TARGETDIR$SCRIPT +git config --global filter.clean_ipynb.smudge cat +echo "installed the ipynb filter" + +To tell git to ignore the output and prompts for a notebook, +open the notebook's metadata (Edit > Edit Notebook Metadata). A +panel should open containing the lines: +-------------- +Or by hand: +1. create ~/.scripts and paste the ipynb_drop_output.py into it +2. Give it execution permission with chmod +x ~/.scripts/ipynb_drop_output.py +3. Create ~/.gitattributes and paste the following text into it: "*.ipynb filter=clean_ipynb" +4. Execute the following commands: + git config --global core.attributesfile ~/.gitattributes + git config --global filter.clean_ipynb.clean ~/.scripts/ipynb_drop_output.py + git config --global filter.clean_ipynb.smudge cat +Your outputs should now be filtered! + + { + "name" : "", + "signature" : "some very long hash" + } + +Add an extra line so that the metadata now looks like: + + { + "name" : "", + "signature" : "don't change the hash, but add a comma at the end of the line", + "git" : { "suppress_outputs" : true } + } + +You may need to "touch" the notebooks for git to actually register a change, if +your notebooks are already under version control. + +Notes +===== + +This script is inspired by http://stackoverflow.com/a/20844506/827862, but +lets the user specify whether the ouptut of a notebook should be suppressed +in the notebook's metadata, and works for IPython v3.0. +""" + +import sys +import json + +nb = sys.stdin.read() + +json_in = json.loads(nb) +nb_metadata = json_in["metadata"] +suppress_output = False +#if "git" in nb_metadata: +# if "suppress_outputs" in nb_metadata["git"] and nb_metadata["git"]["suppress_outputs"]: +# suppress_output = True +#if not suppress_output: +# sys.stdout.write(nb) +# exit() + + +ipy_version = int(json_in["nbformat"])-1 # nbformat is 1 more than actual version. + +def strip_output_from_cell(cell): + if "outputs" in cell: + cell["outputs"] = [] + if "prompt_number" in cell: + del cell["prompt_number"] + if "execution_count" in cell: + cell["execution_count"] = None + + + +if ipy_version == 2: + for sheet in json_in["worksheets"]: + for cell in sheet["cells"]: + strip_output_from_cell(cell) +else: + for cell in json_in["cells"]: + strip_output_from_cell(cell) + +json.dump(json_in, sys.stdout, sort_keys=True, indent=1, separators=(",",": "))