Using Jupyter (Experimental)
In [1]:
Copied!
# The magic commands below allow reflecting the changes in an imported module without restarting the kernel.
%load_ext autoreload
%autoreload 2
# We need to add balsam and the modules it depends on to the Python search paths.
import sys
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/env/lib/python3.6/site-packages/')
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/')
# We also need balsam and postgresql to be in the path. (Misha suggests this may not be necessary)
import os
os.environ['PATH'] ='/soft/datascience/Balsam/0.3.5.1/env/bin/:' + os.environ['PATH']
os.environ['PATH'] +=':/soft/datascience/PostgreSQL/9.6.12/bin/'
try:
import balsam
except:
print('Cannot find balsam, make sure balsam is installed or it is available in Python search paths')
# Import widgets
from ipywidgets import interact, interactive
from ipywidgets import fixed, interact_manual
from ipywidgets import Textarea, widgets, Layout, Accordion
from ipywidgets import VBox, HBox, Box, Text, BoundedIntText
# The magic commands below allow reflecting the changes in an imported module without restarting the kernel.
%load_ext autoreload
%autoreload 2
# We need to add balsam and the modules it depends on to the Python search paths.
import sys
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/env/lib/python3.6/site-packages/')
sys.path.insert(0,'/soft/datascience/Balsam/0.3.5.1/')
# We also need balsam and postgresql to be in the path. (Misha suggests this may not be necessary)
import os
os.environ['PATH'] ='/soft/datascience/Balsam/0.3.5.1/env/bin/:' + os.environ['PATH']
os.environ['PATH'] +=':/soft/datascience/PostgreSQL/9.6.12/bin/'
try:
import balsam
except:
print('Cannot find balsam, make sure balsam is installed or it is available in Python search paths')
# Import widgets
from ipywidgets import interact, interactive
from ipywidgets import fixed, interact_manual
from ipywidgets import Textarea, widgets, Layout, Accordion
from ipywidgets import VBox, HBox, Box, Text, BoundedIntText
In [13]:
Copied!
from balsam.django_config.db_index import refresh_db_index
databasepaths = []
databasepaths.extend(refresh_db_index())
print(f'There are {len(databasepaths)} Balsam databases available.')
for i,db in enumerate(databasepaths):
print(f'{i}: {db}')
@interact(db=[(i,db) for i,db in enumerate(databasepaths)])
def activate_database(db=''):
"""
Activates Balsam database by setting the BALSAM_DB_PATH environment variable.
Note: Once BALSAM_DB_PATH is set, you need to restart Jupyter kernel to change it again.
"""
os.environ["BALSAM_DB_PATH"] = db
print(f'Selected database: {os.environ["BALSAM_DB_PATH"]}')
from balsam.django_config.db_index import refresh_db_index
databasepaths = []
databasepaths.extend(refresh_db_index())
print(f'There are {len(databasepaths)} Balsam databases available.')
for i,db in enumerate(databasepaths):
print(f'{i}: {db}')
@interact(db=[(i,db) for i,db in enumerate(databasepaths)])
def activate_database(db=''):
"""
Activates Balsam database by setting the BALSAM_DB_PATH environment variable.
Note: Once BALSAM_DB_PATH is set, you need to restart Jupyter kernel to change it again.
"""
os.environ["BALSAM_DB_PATH"] = db
print(f'Selected database: {os.environ["BALSAM_DB_PATH"]}')
There are 7 Balsam databases available. 0: /lus/theta-fs0/projects/datascience/keceli/valence_balsam/balsamdb 1: /lus/theta-fs0/projects/datascience/keceli/balsam/nwchem_demo/db 2: /lus/theta-fs0/projects/datascience/keceli/balsam/simint/simint_db 3: /lus/theta-fs0/projects/datascience/keceli/balsam/balsamdb_general 4: /lus/theta-fs0/projects/datascience/keceli/balsam/jupyter_test 5: /lus/theta-fs0/projects/connectomics_aesp/balsam_database 6: /lus/theta-fs0/projects/datascience/keceli/container/scaling_test/connectomics
interactive(children=(Dropdown(description='db', options=((0, '/lus/theta-fs0/projects/datascience/keceli/vale…
In [12]:
Copied!
# If balsam server is not running (happens after theta maintanence) you get:
# "OperationalError: could not connect to server: Connection refused"
# This exception is caught here and it tries to restart the server.
from balsam.core.models import ApplicationDefinition as App
from balsam.scripts import postgres_control
try:
apps = App.objects.all()
print(f'Found {len(apps)} apps in {os.environ["BALSAM_DB_PATH"]}:')
for i,app in enumerate(apps):
print(f'{i}: {app.name}')
except Exception as e:
if 'could not connect to server' in e:
print('Exception caught. Could not connect to server.')
print(f'Trying to restart the Balsam server {os.environ["BALSAM_DB_PATH"]} ...')
try:
postgres_control.start_main(os.environ["BALSAM_DB_PATH"])
except Exception as e:
print('Exception caught:')
print(e.with_traceback())
else:
print('Exception caught:')
print(e.with_traceback())
# If balsam server is not running (happens after theta maintanence) you get:
# "OperationalError: could not connect to server: Connection refused"
# This exception is caught here and it tries to restart the server.
from balsam.core.models import ApplicationDefinition as App
from balsam.scripts import postgres_control
try:
apps = App.objects.all()
print(f'Found {len(apps)} apps in {os.environ["BALSAM_DB_PATH"]}:')
for i,app in enumerate(apps):
print(f'{i}: {app.name}')
except Exception as e:
if 'could not connect to server' in e:
print('Exception caught. Could not connect to server.')
print(f'Trying to restart the Balsam server {os.environ["BALSAM_DB_PATH"]} ...')
try:
postgres_control.start_main(os.environ["BALSAM_DB_PATH"])
except Exception as e:
print('Exception caught:')
print(e.with_traceback())
else:
print('Exception caught:')
print(e.with_traceback())
Found 11 apps in /lus/theta-fs0/projects/datascience/keceli/valence_balsam/balsamdb: 0: valence 1: say-hello 2: simint_avx512 3: simint_avx2 4: simint_avx 5: simint_scalar 6: valence_avx512 7: valence_avx2 8: valence190705 9: valence190707 10: valence_scalar_190707
In [16]:
Copied!
#apps = App.objects.all()
@interact(name='',executable='',checkexe=False,description='',preprocess='',postprocess='',saveapp=False)
def add_app(name, executable, description='', envscript='', preprocess='', postprocess='', checkexe=False,saveapp=False):
"""
Adds a new app to the balsam database.
Parameters
----------
name: str, name of the app
executable: str, path to the executable
checkexe: boolean, True: check if executable is available
description: str, info about the app
preprocess: str, path to the preprocessing script
postprocess: str, path to the postprocessing script
saveapp: boolean, True: save app to the database
"""
from balsam.core.models import ApplicationDefinition as App
import shutil
newapp = App()
if checkexe:
if shutil.which(executable):
print('{} is found'.format(executable))
else:
print('{} is not found'.format(executable))
return newapp
if App.objects.filter(name=name).exists():
print("An application named {} already exists".format(name))
else:
newapp.name = name
newapp.executable = executable
newapp.description = description
newapp.envscript = envscript
newapp.preprocess = preprocess
newapp.postprocess = postprocess
if saveapp:
newapp.save()
print(f'{newapp.name} added to the balsam database.')
return newapp
#apps = App.objects.all()
@interact(name='',executable='',checkexe=False,description='',preprocess='',postprocess='',saveapp=False)
def add_app(name, executable, description='', envscript='', preprocess='', postprocess='', checkexe=False,saveapp=False):
"""
Adds a new app to the balsam database.
Parameters
----------
name: str, name of the app
executable: str, path to the executable
checkexe: boolean, True: check if executable is available
description: str, info about the app
preprocess: str, path to the preprocessing script
postprocess: str, path to the postprocessing script
saveapp: boolean, True: save app to the database
"""
from balsam.core.models import ApplicationDefinition as App
import shutil
newapp = App()
if checkexe:
if shutil.which(executable):
print('{} is found'.format(executable))
else:
print('{} is not found'.format(executable))
return newapp
if App.objects.filter(name=name).exists():
print("An application named {} already exists".format(name))
else:
newapp.name = name
newapp.executable = executable
newapp.description = description
newapp.envscript = envscript
newapp.preprocess = preprocess
newapp.postprocess = postprocess
if saveapp:
newapp.save()
print(f'{newapp.name} added to the balsam database.')
return newapp
interactive(children=(Text(value='', description='name'), Text(value='', description='executable'), Text(value…
In [16]:
Copied!
# Not ready, find how to add dictionaries
#apps = App.objects.all()
#appnames = [app.name for app in apps]
@interact(name='', workflow='', application=appnames, description='', args='', num_nodes=range(1,4394), ranks_per_node=range(1,256),cpu_affinity=['depth','none'],data={},environ_vars={})
def add_job(name, workflow, application, description='', args='', num_nodes=1, ranks_per_node=1,cpu_affinity='depth',data={},environ_vars={}):
from balsam.launcher.dag import BalsamJob
job = BalsamJob()
job.name = name
job.workflow = workflow
job.application = application
job.description = description
job.args = args
job.num_nodes = num_nodes
job.ranks_per_node = ranks_per_node
job.cpu_affinity = cpu_affinity
job.environ_vars = environ_vars
job.data = {}
job.save()
# Not ready, find how to add dictionaries
#apps = App.objects.all()
#appnames = [app.name for app in apps]
@interact(name='', workflow='', application=appnames, description='', args='', num_nodes=range(1,4394), ranks_per_node=range(1,256),cpu_affinity=['depth','none'],data={},environ_vars={})
def add_job(name, workflow, application, description='', args='', num_nodes=1, ranks_per_node=1,cpu_affinity='depth',data={},environ_vars={}):
from balsam.launcher.dag import BalsamJob
job = BalsamJob()
job.name = name
job.workflow = workflow
job.application = application
job.description = description
job.args = args
job.num_nodes = num_nodes
job.ranks_per_node = ranks_per_node
job.cpu_affinity = cpu_affinity
job.environ_vars = environ_vars
job.data = {}
job.save()
In [5]:
Copied!
#def print_job_info(id=''):
@interact(job_id='',show_output=False)
def get_job_info(job_id='',show_output=False):
"""
Prints verbose job info for a given job id.
Parameters
----------
job_id: str, Partial or full Balsam job id.
"""
from balsam.launcher.dag import BalsamJob as Job
jobs = Job.objects.all().filter(job_id__contains=job_id)
if len(jobs) == 1:
thejob = jobs[0]
print(jobs[0])
if show_output:
output = f'{thejob.working_directory}/{thejob.name}.out'
with open(output) as f:
out = f.read()
print(f'Output file {output} content:')
print(out)
elif len(jobs) == 0:
print('No matching jobs')
else:
print(f'{len(jobs)} jobs matched, enter full id.')
#def print_job_info(id=''):
@interact(job_id='',show_output=False)
def get_job_info(job_id='',show_output=False):
"""
Prints verbose job info for a given job id.
Parameters
----------
job_id: str, Partial or full Balsam job id.
"""
from balsam.launcher.dag import BalsamJob as Job
jobs = Job.objects.all().filter(job_id__contains=job_id)
if len(jobs) == 1:
thejob = jobs[0]
print(jobs[0])
if show_output:
output = f'{thejob.working_directory}/{thejob.name}.out'
with open(output) as f:
out = f.read()
print(f'Output file {output} content:')
print(out)
elif len(jobs) == 0:
print('No matching jobs')
else:
print(f'{len(jobs)} jobs matched, enter full id.')
interactive(children=(Text(value='', description='job_id'), Checkbox(value=False, description='show_output'), …
In [17]:
Copied!
from balsam.launcher.dag import BalsamJob as Job
#for job in Job.objects.filter(state='JOB_FINISHED',workflow='wf_test_valence190705').all():
from balsam.core.models import ApplicationDefinition as App
allstates = ['ALL',
'CREATED',
'AWAITING_PARENTS',
'READY',
'STAGED_IN',
'PREPROCESSED',
'RUNNING',
'RUN_DONE',
'POSTPROCESSED',
'JOB_FINISHED',
'RUN_TIMEOUT',
'RUN_ERROR',
'RESTART_READY',
'FAILED',
'USER_KILLED']
allworkflows = [wf['workflow'] for wf in Job.objects.order_by().values('workflow').distinct()]
allworkflows.append('ALL')
allapps = [app.name for app in App.objects.all()]
allapps.append('ALL')
@interact(state=allstates,workflow=allworkflows,app=allapps,name='')
def list_jobs(state='ALL',workflow='ALL',app='ALL',name=''):
jobs = Job.objects.all()
print(f'Total number of jobs: {len(jobs)}')
if state != 'ALL':
jobs = jobs.filter(state=state)
if workflow != 'ALL':
jobs = jobs.filter(workflow=workflow)
if app != 'ALL':
jobs = jobs.filter(application=app)
if name:
jobs = jobs.filter(name__icontains=name)
print(f'Selected number of jobs: {len(jobs)}')
if len(jobs) > 0:
t = '{:<20}'.format('Name')
t += ' {:>8}'.format('Nodes')
t += ' {:>12}'.format('Ranks')
t += ' {:^8}'.format('ID')
if state =='JOB_FINISHED':
t += '{:>12}'.format('Runtime')
elif state =='ALL':
t += '{:>15}'.format('State')
print(t)
for job in jobs:
s = '{:<20.15}'.format(job.name)
s += ' {:>8}'.format(job.num_nodes)
s += ' {:>12}'.format(job.num_ranks)
s += ' {:>8}'.format(str(job.job_id).split('-')[0])
if state =='JOB_FINISHED':
s += '{:>12.3f}'.format(job.runtime_seconds)
elif state =='ALL':
s += '{:>15}'.format(job.state)
print(s)
from balsam.launcher.dag import BalsamJob as Job
#for job in Job.objects.filter(state='JOB_FINISHED',workflow='wf_test_valence190705').all():
from balsam.core.models import ApplicationDefinition as App
allstates = ['ALL',
'CREATED',
'AWAITING_PARENTS',
'READY',
'STAGED_IN',
'PREPROCESSED',
'RUNNING',
'RUN_DONE',
'POSTPROCESSED',
'JOB_FINISHED',
'RUN_TIMEOUT',
'RUN_ERROR',
'RESTART_READY',
'FAILED',
'USER_KILLED']
allworkflows = [wf['workflow'] for wf in Job.objects.order_by().values('workflow').distinct()]
allworkflows.append('ALL')
allapps = [app.name for app in App.objects.all()]
allapps.append('ALL')
@interact(state=allstates,workflow=allworkflows,app=allapps,name='')
def list_jobs(state='ALL',workflow='ALL',app='ALL',name=''):
jobs = Job.objects.all()
print(f'Total number of jobs: {len(jobs)}')
if state != 'ALL':
jobs = jobs.filter(state=state)
if workflow != 'ALL':
jobs = jobs.filter(workflow=workflow)
if app != 'ALL':
jobs = jobs.filter(application=app)
if name:
jobs = jobs.filter(name__icontains=name)
print(f'Selected number of jobs: {len(jobs)}')
if len(jobs) > 0:
t = '{:<20}'.format('Name')
t += ' {:>8}'.format('Nodes')
t += ' {:>12}'.format('Ranks')
t += ' {:^8}'.format('ID')
if state =='JOB_FINISHED':
t += '{:>12}'.format('Runtime')
elif state =='ALL':
t += '{:>15}'.format('State')
print(t)
for job in jobs:
s = '{:<20.15}'.format(job.name)
s += ' {:>8}'.format(job.num_nodes)
s += ' {:>12}'.format(job.num_ranks)
s += ' {:>8}'.format(str(job.job_id).split('-')[0])
if state =='JOB_FINISHED':
s += '{:>12.3f}'.format(job.runtime_seconds)
elif state =='ALL':
s += '{:>15}'.format(job.state)
print(s)
interactive(children=(Dropdown(description='state', options=('ALL', 'CREATED', 'AWAITING_PARENTS', 'READY', 'S…
In [ ]:
Copied!