#!/usr/bin/env python # # Script to run first the analysis portion on a set of NMR strucutres # followed by XPLOR water refinement and analysis of resulting structures # # Usage: edit pathnames in refine.pars file # execute script 'nmr_waterrefine.py' # import string,os,sys,profile,cPickle,math,socket,time,shutil,fnmatch,glob,random,pdb_file import nmv_dct,nmv_dsc,nmv_xplor # MAIN SCRIPT # =============================== # SET THE GENERAL CONFIGURATION FILE AND INPUT PARAMETER FILE configfile = os.path.join(os.getcwd(),'nmr_refine.conf') parameterfile = os.path.join(os.getcwd(),'refine.pars') # READ THE GENERAL CONFIGURATION FILE print 'Reading CONFIGURATION file %s...\n'%configfile config = nmv_dct.read(configfile) # READ THE REFINEMENT PARAMETER FILE print 'Reading PARAMETER file %s...\n'%parameterfile refinepars = nmv_dct.read(parameterfile) # SET SOFTWARE AND ACCOMPANYING PARAMETER LOCATIONS xplor = config['XPLOR'] topparpath = config['XPLOR_TOPPAR'] protocolspath = config['XPLOR_SOLVENT'] # SET PATHNAMES path = refinepars['PROJECTDIR'] tmppath = refinepars['TMPDIR'] psfpath = refinepars['INPUTDIR'] tablepath = refinepars['INPUTDIR'] inputcoordinatespath = refinepars['INPUTDIR'] scriptspath = refinepars['SCRIPTSDIR'] # SET INITIAL PARAMETERS FOR THE XPLOR RUN ninputstructures = int(refinepars['NINPUTSTRUCTURES']) pdb_base = refinepars['PDB_BASE'] # SET FILES AND PARAMETERS FOR THE XPLOR RUNS THAT REMAIN UNCHANGED psf_file = refinepars['PSF_FILE'] noe_file = refinepars['NOE_FILE'] cdih_file = refinepars['CDIH_FILE'] coup_file = refinepars['COUP_FILE'] sani_file = refinepars['SANI_FILE'] vean_file = refinepars['VEAN_FILE'] noe_accept = float(refinepars['NOE_ACCEPT']) cdih_accept = float(refinepars['CDIH_ACCEPT']) coup_accept = float(refinepars['COUP_ACCEPT']) sani_accept = float(refinepars['SANI_ACCEPT']) vean_accept = float(refinepars['VEAN_ACCEPT']) noe_ave = refinepars['NOE_AVE'] heat_steps = int(refinepars['HEAT_STEPS']) hot_steps = int(refinepars['HOT_STEPS']) cool_steps = int(refinepars['COOL_STEPS']) seed = int(refinepars['SEED']) # SET BASE FILENAMES analyzed_pdb_base = 'analyzed' refined_pdb_base = 'refined' analyzed_subdirname = 'analyzed_input' refined_subdirname = 'refined_input' # SET SOFTWARE AND PARAMETERS FOR THE VALIDATION ROUTINES profit = config['PROFIT'] procheck_run = config['PROCHECK_RUN'] prochecknmr_run = config['PROCHECKNMR_RUN'] procheck_dir = config['PROCHECK_DIR'] whatif_run = config['WHATIF_RUN'] zonefile = refinepars['ZONEFILE'] energysort = refinepars['ENERGYSORT'] selection = refinepars['SELECTION'] # SET FURTHER OPTIONS run_cluster = refinepars['RUN_CLUSTER'] queu_cluster = refinepars['QUEU_CLUSTER'] ############################# # ANALYSIS INPUT STRUCTURES # ############################# # SET THE OUTPUTCOORDINATESPATH FOR THE ANALYSIS OF THE INPUT STRUCTURES outputcoordinatespath = os.path.join(path,analyzed_subdirname) # CHECK WHETHER ANALYSIS WAS DONE filename = os.path.join(outputcoordinatespath,'summary_%s.txt'%analyzed_pdb_base) if os.path.exists(filename): print 'Analysis of the input structures is finished' else: # SET TEMPORARY FILES checkscriptfilename = os.path.join(outputcoordinatespath,'checkscr.tmp') runpath = os.path.join(outputcoordinatespath,'jobs') procheckpath = os.path.join(outputcoordinatespath,'procheck') whatcheckpath = os.path.join(outputcoordinatespath,'whatcheck') # RE-CREATE ANALYSIS DIRECTORY/CREATE ANALYSIS DIRECTORY IF NOT PRESENT: # REMOVE ALL PREVIOUS TEMPORARY FILES FOR CLEAN START # CREATE RUNPATH if os.path.exists(outputcoordinatespath): print 'Cleaning directory %s'%outputcoordinatespath nmv_dsc.removedir(outputcoordinatespath) os.mkdir(outputcoordinatespath) os.mkdir(runpath) else: print 'Creating directory %s'%outputcoordinatespath os.mkdir(outputcoordinatespath) os.mkdir(runpath) # ANALYZE THE INPUT STRUCTURES IN THE XPLOR FORCEFIELD USED FOR THE WATERREFINEMENT # INCLUDES VIOLATION AND ENERGY ANALYSIS # NOTE THAT THE XPLOR PROTOCOLS DO NOT YET INCLUDE THE RDC OR COUP RESTRAINTS. # THE NMV_XPLOR MODULE CREATES THE SCRIPTS NEEDED TO CALCULATE STRUCTURES ON A CLUSTER nmv_xplor.analyze(xplor, topparpath, protocolspath, inputcoordinatespath, outputcoordinatespath, runpath, ninputstructures, pdb_base, analyzed_pdb_base, psfpath, psf_file, tablepath, noe_file=noe_file, noe_ave=noe_ave, noe_accept=noe_accept, cdih_file=cdih_file, cdih_accept=cdih_accept, coup_file=coup_file, coup_accept=coup_accept, sani_file=sani_file, sani_accept=sani_accept, vean_file=vean_file, vean_accept=vean_accept, run_cluster=run_cluster, queu_cluster=queu_cluster) # CHECK EVERY 10 SECS WHETHER ALL STRUCTURES ARE ANALYZED (NEEDED WHEN RUNNING ON A CLUSTER) # todo: check acceptance criteria and loop until we have enough accepted structures done=0 while not done: time.sleep(10) if len(glob.glob(os.path.join(outputcoordinatespath,analyzed_pdb_base+'*.pdb')))==ninputstructures: done=1 print 'Done with the analysis of the input coordinate and constraints' # RUN THE OTHER CHECKS # CREATE INPUT SCRIPTS FOR THE CHECKING ROUTINES print 'Starting protein structure checks of the analyzed input structures' checkscript=open(checkscriptfilename,'w') checkscript.write('#!/bin/tcsh -f\n') # SET ENVIRONMENT VARIABLES checkscript.write('setenv prodir %s\n'%procheck_dir) # WRITE A XPLOR FILE-LIST CONTAINING THE INFORMATION ABOUT THE ANALYZED INPUT STRUCTURES # FILES ARE SORTED ON RESTRAINT ENERGY (SET ENERGYSORT), THE FUNCTION USES THE PDB-HEADER INFORMATION print 'DEBUG: doing writing xplor file list' checkscript.write('%s -writexplorfilelist %s %s %s\n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\ outputcoordinatespath,\ analyzed_pdb_base,\ energysort)) print 'DEBUG: done with writing xplor file list' # AUTOMATICALLY CHECK THE ANALYZED INPUT STRUCTURES WITH PROCHECK AND WHATIF # PROFIT IS USED FOR RMSD CALCULATIONS checkscript.write('%s -checkall %s %s %s %s %s %s %s %s %s %s\n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\ profit,\ procheck_run,\ prochecknmr_run,\ procheck_dir,\ whatif_run,\ outputcoordinatespath,\ analyzed_pdb_base,\ zonefile,\ selection, tmppath)) # CREATE A SUMMARY FILE THAT CONTAINS ALL INFORMATION checkscript.write('%s -summary %s %s \n'%(os.path.join(scriptspath,'nmv_nmrcheck.py'),\ outputcoordinatespath,\ analyzed_pdb_base)) # CLOSE THE SCRIPTFILE checkscript.close() # SUBMIT THE SCRIPT EITHER TO THE CLUSTER OR ON A SINGLE PROCESSOR MACHINE os.system('/bin/chmod +x %s'%checkscriptfilename) #print 'Not doing the procheck validation analysis' print 'Doing the procheck validation analysis' if run_cluster=='y': os.system('%s %s &'%(queu_cluster,checkscriptfilename)) else: os.system('%s'%checkscriptfilename) # CHECK EVERY 10 SECS WHETHER THE CHECKING ROUTINES ARE FINISHED (NEEDED WHEN RUNNING ON A CLUSTER) done=0 while not done: time.sleep(10) if os.path.exists(os.path.join(outputcoordinatespath,'summary_%s.txt'%analyzed_pdb_base)): done=1 # COMPRESS JOBS, PROCHECK AND WHATCHECK SUBDIRECTORIES current_location=os.getcwd() os.chdir(outputcoordinatespath) #os.system('tar cfz jobs.tgz jobs') #os.system('tar cfz procheck.tgz procheck') #os.system('tar cfz whatcheck.tgz whatcheck') os.chdir(current_location) # REMOVE THE SCRIPTFILES AND PROCHECK AND WHATCHECK DIRECTORIES # IN THE FINISHED CYCLE #for file in glob.glob(os.path.join(outputcoordinatespath,'*.tmp*')): # os.remove(file) #nmv_dsc.removedir(runpath) #nmv_dsc.removedir(procheckpath) #nmv_dsc.removedir(whatcheckpath) print 'Analysis of the input structures is finished' # print 'DEBUG: doing premature exit for debugging' # sys.exit(1) ################################## # REFINEMENT OF INPUT STRUCTURES # ################################## # SET THE OUTPUTCOORDINATESPATH FOR THE ACTUAL REFINEMENT OF THE ANALYZED INPUT STRUCTURES inputcoordinatespath = os.path.join(path,analyzed_subdirname) outputcoordinatespath = os.path.join(path,refined_subdirname) # CHECK WHETHER ANALYSIS WAS DONE filename = os.path.join(outputcoordinatespath,'summary_%s.txt'%refined_pdb_base) if os.path.exists(filename): print 'Analysis of refined structures is finished' else: # SET TEMPORARY FILES checkscriptfilename = os.path.join(outputcoordinatespath,'checkscr.tmp') runpath = os.path.join(outputcoordinatespath,'jobs') procheckpath = os.path.join(outputcoordinatespath,'procheck') whatcheckpath = os.path.join(outputcoordinatespath,'whatcheck') # CREATE REFINED STRUCTURES DIRECTORY IF NOT PRESENT: if not os.path.exists(outputcoordinatespath): print 'Creating directory %s'%outputcoordinatespath os.mkdir(outputcoordinatespath) os.mkdir(runpath) # CHECK HOW MANY STRUCTURES HAVE ALREADY BEEN REFINED nrefinedstructures=len(glob.glob(os.path.join(outputcoordinatespath,refined_pdb_base)+'_*.pdb')) # SET NSTRUCTURES AND START COUNTER FOR NMV_XPLOR.WATERREFINE nstructures = ninputstructures-nrefinedstructures start_count = nrefinedstructures+1 if nrefinedstructures