Using Jupyter (Experimental)

In [1]:
# The magic commands below allow reflecting the changes in an imported module without restarting the kernel.
%load_ext autoreload
%autoreload 2

# We need to add balsam and the modules it depends on to the Python search paths. 
import sys
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/env/lib/python3.6/site-packages/')
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/')

# We also need balsam and postgresql to be in the path. (Misha suggests this may not be necessary)
import os
os.environ['PATH'] ='/soft/datascience/Balsam/0.3.5.1/env/bin/:' + os.environ['PATH']
os.environ['PATH'] +=':/soft/datascience/PostgreSQL/9.6.12/bin/'

try:
    import balsam
except:
    print('Cannot find balsam, make sure balsam is installed or it is available in Python search paths')    

# Import widgets
from ipywidgets import interact, interactive
from ipywidgets import fixed, interact_manual 
from ipywidgets import Textarea, widgets, Layout, Accordion
from ipywidgets import VBox, HBox, Box, Text, BoundedIntText
In [13]:
from balsam.django_config.db_index import refresh_db_index
databasepaths = []
databasepaths.extend(refresh_db_index())
print(f'There are {len(databasepaths)} Balsam databases available.')
for i,db in enumerate(databasepaths):
    print(f'{i}: {db}')

@interact(db=[(i,db) for i,db in enumerate(databasepaths)])
def activate_database(db=''):
    """
    Activates Balsam database by setting the BALSAM_DB_PATH environment variable.
    Note: Once BALSAM_DB_PATH is set, you need to restart Jupyter kernel to change it again.
    """
    os.environ["BALSAM_DB_PATH"] = db
    print(f'Selected database: {os.environ["BALSAM_DB_PATH"]}')
There are 7 Balsam databases available.
0: /lus/theta-fs0/projects/datascience/keceli/valence_balsam/balsamdb
1: /lus/theta-fs0/projects/datascience/keceli/balsam/nwchem_demo/db
2: /lus/theta-fs0/projects/datascience/keceli/balsam/simint/simint_db
3: /lus/theta-fs0/projects/datascience/keceli/balsam/balsamdb_general
4: /lus/theta-fs0/projects/datascience/keceli/balsam/jupyter_test
5: /lus/theta-fs0/projects/connectomics_aesp/balsam_database
6: /lus/theta-fs0/projects/datascience/keceli/container/scaling_test/connectomics
In [12]:
# If balsam server is not running (happens after theta maintanence) you get:
# "OperationalError: could not connect to server: Connection refused"
# This exception is caught here and it tries to restart the server.

from balsam.core.models import ApplicationDefinition as App
from balsam.scripts import postgres_control

try:
    apps = App.objects.all()
    print(f'Found {len(apps)} apps in {os.environ["BALSAM_DB_PATH"]}:')
    for i,app in enumerate(apps):
        print(f'{i}: {app.name}')
except Exception as e:
    if 'could not connect to server' in e:
        print('Exception caught. Could not connect to server.')
        print(f'Trying to restart the Balsam server {os.environ["BALSAM_DB_PATH"]} ...')
        try:
            postgres_control.start_main(os.environ["BALSAM_DB_PATH"])
        except Exception as e:
            print('Exception caught:')
            print(e.with_traceback())         
    else:
        print('Exception caught:')
        print(e.with_traceback())
Found 11 apps in /lus/theta-fs0/projects/datascience/keceli/valence_balsam/balsamdb:
0: valence
1: say-hello
2: simint_avx512
3: simint_avx2
4: simint_avx
5: simint_scalar
6: valence_avx512
7: valence_avx2
8: valence190705
9: valence190707
10: valence_scalar_190707
In [16]:
#apps = App.objects.all()
@interact(name='',executable='',checkexe=False,description='',preprocess='',postprocess='',saveapp=False)
def add_app(name, executable, description='', envscript='', preprocess='', postprocess='', checkexe=False,saveapp=False):
    """
    Adds a new app to the balsam database.
    Parameters
    ----------
    name: str, name of the app
    executable: str, path to the executable
    checkexe: boolean, True: check if executable is available
    description: str, info about the app
    preprocess: str, path to the preprocessing script
    postprocess: str, path to the postprocessing script
    saveapp: boolean, True: save app to the database
    """
    from balsam.core.models import ApplicationDefinition as App
    import shutil
    newapp = App()
    if checkexe:
        if shutil.which(executable):        
            print('{} is found'.format(executable))
        else:
            print('{} is not found'.format(executable))
            return newapp
        
    if App.objects.filter(name=name).exists():
        print("An application named {} already exists".format(name))
    else:
        newapp.name        = name
        newapp.executable  = executable
        newapp.description = description
        newapp.envscript   = envscript
        newapp.preprocess  = preprocess
        newapp.postprocess = postprocess
        if saveapp:
            newapp.save()
            print(f'{newapp.name} added to the balsam database.')
    return newapp
In [16]:
# Not ready, find how to add dictionaries
#apps = App.objects.all()
#appnames = [app.name for app in apps]
@interact(name='', workflow='', application=appnames, description='', args='', num_nodes=range(1,4394), ranks_per_node=range(1,256),cpu_affinity=['depth','none'],data={},environ_vars={})
def add_job(name, workflow, application, description='', args='', num_nodes=1, ranks_per_node=1,cpu_affinity='depth',data={},environ_vars={}):
    from balsam.launcher.dag import BalsamJob
    job                = BalsamJob()
    job.name           = name
    job.workflow       = workflow
    job.application    = application
    job.description    = description
    job.args           = args
    job.num_nodes      = num_nodes
    job.ranks_per_node = ranks_per_node
    job.cpu_affinity   = cpu_affinity
    job.environ_vars   = environ_vars
    job.data           = {}
    job.save()
In [5]:
#def print_job_info(id=''):
@interact(job_id='',show_output=False)
def get_job_info(job_id='',show_output=False):
    """
    Prints verbose job info for a given job id.
    Parameters
    ----------
    job_id: str, Partial or full Balsam job id.
    """
    from balsam.launcher.dag import BalsamJob as Job
    jobs = Job.objects.all().filter(job_id__contains=job_id)
    if len(jobs) == 1:
        thejob = jobs[0]
        print(jobs[0])
        if show_output:
            output = f'{thejob.working_directory}/{thejob.name}.out'
            with open(output) as f:
                out = f.read()
            print(f'Output file {output} content:')
            print(out)
    elif len(jobs) == 0:
        print('No matching jobs')
    else:
        print(f'{len(jobs)} jobs matched, enter full id.')
In [17]:
from balsam.launcher.dag import BalsamJob as Job
#for job in Job.objects.filter(state='JOB_FINISHED',workflow='wf_test_valence190705').all():
from balsam.core.models import ApplicationDefinition as App
allstates = ['ALL',
             'CREATED',
             'AWAITING_PARENTS',
             'READY',
             'STAGED_IN',
             'PREPROCESSED',
             'RUNNING',
             'RUN_DONE',
             'POSTPROCESSED',
             'JOB_FINISHED',
             'RUN_TIMEOUT',
             'RUN_ERROR',
             'RESTART_READY',
             'FAILED',
             'USER_KILLED']
allworkflows = [wf['workflow'] for wf in Job.objects.order_by().values('workflow').distinct()]
allworkflows.append('ALL')
allapps = [app.name for app in App.objects.all()]
allapps.append('ALL')
@interact(state=allstates,workflow=allworkflows,app=allapps,name='')
def list_jobs(state='ALL',workflow='ALL',app='ALL',name=''):
    jobs = Job.objects.all()
    print(f'Total number of jobs: {len(jobs)}')
    if state != 'ALL':
        jobs = jobs.filter(state=state)
    if workflow != 'ALL':
        jobs = jobs.filter(workflow=workflow)
    if app != 'ALL':
        jobs = jobs.filter(application=app)
    if name:
        jobs = jobs.filter(name__icontains=name)
    print(f'Selected number of jobs: {len(jobs)}')
    if len(jobs) > 0: 
        t = '{:<20}'.format('Name')
        t += ' {:>8}'.format('Nodes')
        t += ' {:>12}'.format('Ranks')
        t += ' {:^8}'.format('ID')
        if state =='JOB_FINISHED':
            t += '{:>12}'.format('Runtime')
        elif state =='ALL':
            t += '{:>15}'.format('State')
        print(t)
        for job in jobs:
            s = '{:<20.15}'.format(job.name)
            s += ' {:>8}'.format(job.num_nodes)
            s += ' {:>12}'.format(job.num_ranks)
            s += '  {:>8}'.format(str(job.job_id).split('-')[0])            

            if state =='JOB_FINISHED':
                s += '{:>12.3f}'.format(job.runtime_seconds)
            elif state =='ALL':
                s += '{:>15}'.format(job.state)
            print(s)
In [ ]: