It looks similar but one big difference is when I run "qconf -sh" I see all my compute nodes listed along with my frontend. However "qconf -sql" is empty.
Thanks On 05/25/2016 11:12 AM, MacMullan IV, Hugh wrote:
I'm no Rocks guy, but maybe this thread will help: https://lists.sdsc.edu/pipermail/npaci-rocks-discussion/2013-January/060918.html -----Original Message----- From: users-boun...@gridengine.org [mailto:users-boun...@gridengine.org] On Behalf Of Pat Haley Sent: Wednesday, May 25, 2016 10:59 AM To: users@gridengine.org Subject: [gridengine users] All queues dropped because of overload or full Hi All, We've just upgraded our cluster from Rocks 6.0 to Rocks 6.2. One issue that has come up is that SGE is not scheduling any jobs, they sit in the queue in a wait state. Checking with "qstat -j" gives the following scheduling info: All queues dropped because of overload or full We have also noticed that there are no sge deamons running on any of the execution nodes (I don't know if that is normal or not). We have also collected the information below from qconf. Any help in resolving this would be greatly appreciated. Thanks ------------------------------------------------- [root@mseas ~]# qconf -sql no cqueue list defined ------------------------------------------------- [root@mseas ~]# qconf -tsm root@mseas.local triggers scheduler monitoring [root@mseas common]# more schedd_runlog Wed May 25 09:22:56 2016|-------------START-SCHEDULER-RUN------------- Wed May 25 09:22:56 2016|All queues dropped because of overload or full Wed May 25 09:22:56 2016|--------------STOP-SCHEDULER-RUN------------- ------------------------------------------------- [root@mseas ~]# qconf -sconf #global: execd_spool_dir /opt/gridengine/default/spool mailer /bin/mail xterm /usr/bin/X11/xterm load_sensor none prolog none epilog none shell_start_mode posix_compliant login_shells sh,bash,ksh,csh,tcsh min_uid 0 min_gid 0 user_lists none xuser_lists none projects none xprojects none enforce_project false enforce_user auto load_report_time 00:00:40 max_unheard 00:05:00 reschedule_unknown 02:00:00 loglevel log_warning administrator_mail none set_token_cmd none pag_cmd none token_extend_time none shepherd_cmd none qmaster_params none execd_params none reporting_params accounting=true reporting=true \ flush_time=00:00:15 joblog=true sharelog=00:00:00 finished_jobs 100 gid_range 20000-20100 qlogin_command builtin qlogin_daemon builtin rlogin_command builtin rlogin_daemon builtin rsh_command builtin rsh_daemon builtin max_aj_instances 2000 max_aj_tasks 75000 max_u_jobs 0 max_jobs 0 max_advance_reservations 0 auto_user_oticket 0 auto_user_fshare 0 auto_user_default_project none auto_user_delete_time 86400 delegated_file_staging false reprioritize 0 jsv_url none jsv_allowed_mod ac,h,i,e,o,j,M,N,p,w ------------------------------------------------- [root@mseas ~]# qconf -se compute-0-0 hostname compute-0-0.local load_scaling NONE complex_values NONE load_values arch=linux-x64,num_proc=4,mem_total=63802.578125M, \ swap_total=996.210938M,virtual_total=64798.789062M, \ load_avg=0.060000,load_short=0.040000, \ load_medium=0.060000,load_long=0.000000, \ mem_free=63263.187500M,swap_free=996.210938M, \ virtual_free=64259.398438M,mem_used=539.390625M, \ swap_used=0.000000M,virtual_used=539.390625M, \ cpu=0.000000,m_topology=SCCSCC,m_topology_inuse=SCCSCC, \ m_socket=2,m_core=4,np_load_avg=0.015000, \ np_load_short=0.010000,np_load_medium=0.015000, \ np_load_long=0.000000 processors 4 user_lists NONE xuser_lists NONE projects NONE xprojects NONE usage_scaling NONE report_variables NONE ------------------------------------------------- [root@mseas ~]# qconf -ssconf algorithm default schedule_interval 0:0:15 maxujobs 0 queue_sort_method load job_load_adjustments np_load_avg=0.50 load_adjustment_decay_time 0:7:30 load_formula np_load_avg schedd_job_info true flush_submit_sec 0 flush_finish_sec 0 params none reprioritize_interval 0:0:0 halftime 168 usage_weight_list cpu=1.000000,mem=0.000000,io=0.000000 compensation_factor 5.000000 weight_user 0.250000 weight_project 0.250000 weight_department 0.250000 weight_job 0.250000 weight_tickets_functional 0 weight_tickets_share 0 share_override_tickets TRUE share_functional_shares TRUE max_functional_jobs_to_schedule 200 report_pjob_tickets TRUE max_pending_tasks_per_job 50 halflife_decay_list none policy_hierarchy OFS weight_ticket 0.500000 weight_waiting_time 0.278000 weight_deadline 3600000.000000 weight_urgency 0.500000 weight_priority 0.000000 max_reservation 0 default_duration INFINITY -------------------------------------------------
-- -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- Pat Haley Email: pha...@mit.edu Center for Ocean Engineering Phone: (617) 253-6824 Dept. of Mechanical Engineering Fax: (617) 253-8125 MIT, Room 5-213 http://web.mit.edu/phaley/www/ 77 Massachusetts Avenue Cambridge, MA 02139-4301 _______________________________________________ users mailing list users@gridengine.org https://gridengine.org/mailman/listinfo/users