#!/usr/bin/env python
import json,fire,re
from pathlib import Path
import io
def is_export(cell):
if cell['cell_type'] != 'code': return False
= cell['source']
src if len(src) == 0 or len(src[0]) < 7: return False
#import pdb; pdb.set_trace()
return re.match(r'^\s*#\s*export\s*$', src[0], re.IGNORECASE) is not None
def getSortedFiles(allFiles, upTo=None):
'''Returns all the notebok files sorted by name.
allFiles = True : returns all files
= '*_*.ipynb' : returns this pattern
upTo = None : no upper limit
= filter : returns all files up to 'filter' included
The sorting optioj is important to ensure that the notebok are executed in correct order.
'''
import glob
= []
ret if (allFiles==True): ret = glob.glob('*.ipynb') # Checks both that is bool type and that is True
if (isinstance(allFiles,str)): ret = glob.glob(allFiles)
if 0==len(ret):
print('WARNING: No files found')
return ret
if upTo is not None: ret = [f for f in ret if str(f)<=str(upTo)]
return sorted(ret)
def notebook2script(fname=None, allFiles=None, upTo=None, fnameout=None):
'''Finds cells starting with `#export` and puts them into a new module
+ allFiles: convert all files in the folder
+ upTo: convert files up to specified one included
ES:
notebook2script --allFiles=True # Parse all files
notebook2script --allFiles=nb* # Parse all files starting with nb*
notebook2script --upTo=10 # Parse all files with (name<='10')
notebook2script --allFiles=*_*.ipynb --upTo=10 # Parse all files with an '_' and (name<='10')
notebook2script --fnameout='test_25.py'
'''
# initial checks
if (allFiles is None) and (upTo is not None): allFiles=True # Enable allFiles if upTo is present
if (fname is None) and (not allFiles): print('Should provide a file name')
if not allFiles: notebook2scriptSingle(fname, fnameout)
else:
print('Begin...')
for f in getSortedFiles(allFiles,upTo)]
[notebook2scriptSingle(f, fnameout) print('...End')
def notebook2scriptSingle(fname, *fname_out):
"Finds cells starting with `#export` and puts them into a new module"
= Path(fname)
fname if (fname_out[0]==None):
= f'nb_{fname.stem.split("_")[0]}.py'
fname_out else: fname_out = fname_out[0]
= json.load(open(fname,'r',encoding="utf-8"))
main_dic = [c for c in main_dic['cells'] if is_export(c)]
code_cells = f'''
module #################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: {fname.name}
'''
for cell in code_cells: module += ''.join(cell['source'][1:]) + '\n\n'
# remove trailing spaces
= re.sub(r' +$', '', module, flags=re.MULTILINE)
module if not (fname.parent/'exp').exists(): (fname.parent/'exp').mkdir()
= fname.parent/'exp'/fname_out
output_path with io.open(output_path, "w", encoding="utf-8") as f:
-2])
f.write(module[:print(f"Converted {fname} to {output_path}")
if __name__ == '__main__': fire.Fire(notebook2script)
I have been using this for more than a year and I have just realized I don’t have any blog entry about it?
Notebook2script
As you may know, the full fastai v2 has been written in notebooks.
This is quite impressive these notebooks are documentation + code, and fastai libraries are built from these notebooks.
I have been using one of the first version of Jeremy Howard’s process, I am quite sure there have been many improvment since then.
Here is the source code for notebook2script.py
download
How to extract modules from notebooks
Mark cells to be exported in your notebook
notebook2script
expects a keyword at the top of each cell to be exported. This keyword is #export
.
::: {#cell-9 .cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’ ExecuteTime=‘{“end_time”:“2021-09-29T06:44:34.555874Z”,“start_time”:“2021-09-29T06:44:34.552776Z”}’ execution_count=1}
= 'This will be exported in a module' variable
:::
::: {#cell-10 .cell 0=‘e’ 1=‘x’ 2=‘p’ 3=‘o’ 4=‘r’ 5=‘t’ ExecuteTime=‘{“end_time”:“2021-09-29T06:44:54.039338Z”,“start_time”:“2021-09-29T06:44:54.036767Z”}’ execution_count=2}
= 'This one as well' variable2
:::
= 'Not this one' variable3
You got the idea
Export your module my_great_module
#generate py from ipynb
#code from Jeremy Howard (fastai v2)
#!python notebook2script.py "00D059_init_and_import.ipynb"
!python notebook2script.py --fnameout="my_great_module.py" "2021-09-29-nbdev-notebook2script.ipynb"
Converted 2021-09-29-nbdev-notebook2script.ipynb to exp/my_great_module.py
Exported module
If subfolder exp
doesn’t exist, it will be automatically created.
And my_great_module.py
is being created as well.
Here is the content generated.
!cat exp/my_great_module.py
#################################################
### THIS FILE WAS AUTOGENERATED! DO NOT EDIT! ###
#################################################
# file to edit: 2021-09-29-nbdev-notebook2script.ipynb
variable = 'This will be exported in a module'
variable2 = 'This one as well'
Python library needed: fire
import sys
!conda install --yes --prefix {sys.prefix} -c conda-forge fire
Collecting package metadata (current_repodata.json): done
Solving environment: done
==> WARNING: A newer version of conda exists. <==
current version: 4.10.1
latest version: 4.10.3
Please update conda by running
$ conda update -n base -c defaults conda
## Package Plan ##
environment location: /home/guillaume/anaconda3/envs/xgboost
added / updated specs:
- fire
The following packages will be UPDATED:
fire 0.2.1-py_0 --> 0.4.0-pyh44b312d_0
Preparing transaction: done
Verifying transaction: done
Executing transaction: done
Combine with pdoc to generate documentation
#code from Jeremy Howard (fastai v2)
#!python notebook2script.py "00D059_init_and_import.ipynb"
= "dataprophet"
library_name = "00 - dataprophet library - dataprophet"
notebook_name
!python notebook2script.py --fnameout="{library_name}.py" "{notebook_name}.ipynb"
!pdoc --html --output-dir exp/html --force "exp/{library_name}.py"
Note the {} notation which allows to use ipython variables as arguments to bash commands
Proper docstring
See in Autogenerate documentation from custom python classes