|
@@ -7,7 +7,6 @@ import com.xxl.job.admin.core.model.XxlJobLog;
|
7
|
7
|
import com.xxl.job.admin.core.trigger.TriggerTypeEnum;
|
8
|
8
|
import com.xxl.job.admin.core.util.I18nUtil;
|
9
|
9
|
import com.xxl.job.core.biz.model.ReturnT;
|
10
|
|
-import com.xxl.job.core.handler.IJobHandler;
|
11
|
10
|
import org.apache.commons.collections4.CollectionUtils;
|
12
|
11
|
import org.slf4j.Logger;
|
13
|
12
|
import org.slf4j.LoggerFactory;
|
|
@@ -17,12 +16,15 @@ import javax.mail.MessagingException;
|
17
|
16
|
import javax.mail.internet.MimeMessage;
|
18
|
17
|
import java.io.UnsupportedEncodingException;
|
19
|
18
|
import java.text.MessageFormat;
|
20
|
|
-import java.util.*;
|
21
|
|
-import java.util.concurrent.LinkedBlockingQueue;
|
|
19
|
+import java.util.Arrays;
|
|
20
|
+import java.util.HashSet;
|
|
21
|
+import java.util.List;
|
|
22
|
+import java.util.Set;
|
22
|
23
|
import java.util.concurrent.TimeUnit;
|
23
|
24
|
|
24
|
25
|
/**
|
25
|
26
|
* job monitor instance
|
|
27
|
+ *
|
26
|
28
|
* @author xuxueli 2015-9-1 18:05:56
|
27
|
29
|
*/
|
28
|
30
|
public class JobFailMonitorHelper {
|
|
@@ -35,8 +37,6 @@ public class JobFailMonitorHelper {
|
35
|
37
|
|
36
|
38
|
// ---------------------- monitor ----------------------
|
37
|
39
|
|
38
|
|
- private LinkedBlockingQueue<Integer> queue = new LinkedBlockingQueue<Integer>(0xfff8);
|
39
|
|
-
|
40
|
40
|
private Thread monitorThread;
|
41
|
41
|
private volatile boolean toStop = false;
|
42
|
42
|
public void start(){
|
|
@@ -44,52 +44,47 @@ public class JobFailMonitorHelper {
|
44
|
44
|
|
45
|
45
|
@Override
|
46
|
46
|
public void run() {
|
|
47
|
+
|
47
|
48
|
// monitor
|
48
|
49
|
while (!toStop) {
|
49
|
50
|
try {
|
50
|
|
- List<Integer> jobLogIdList = new ArrayList<Integer>();
|
51
|
|
- int drainToNum = JobFailMonitorHelper.instance.queue.drainTo(jobLogIdList);
|
52
|
51
|
|
53
|
|
- if (CollectionUtils.isNotEmpty(jobLogIdList)) {
|
54
|
|
- for (Integer jobLogId : jobLogIdList) {
|
55
|
|
- if (jobLogId==null || jobLogId==0) {
|
|
52
|
+ List<Integer> failLogIds = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().findFailJobLogIds(1000);
|
|
53
|
+ if (CollectionUtils.isNotEmpty(failLogIds)) {
|
|
54
|
+ for (int failLogId: failLogIds) {
|
|
55
|
+
|
|
56
|
+ // lock log
|
|
57
|
+ int lockRet = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateAlarmStatus(failLogId, 0, -1);
|
|
58
|
+ if (lockRet < 1) {
|
56
|
59
|
continue;
|
57
|
60
|
}
|
58
|
|
- XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(jobLogId);
|
59
|
|
- if (log == null) {
|
60
|
|
- continue;
|
|
61
|
+ XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(failLogId);
|
|
62
|
+ XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId());
|
|
63
|
+
|
|
64
|
+ // 1、fail retry monitor
|
|
65
|
+ if (log.getExecutorFailRetryCount() > 0) {
|
|
66
|
+ JobTriggerPoolHelper.trigger(log.getJobId(), TriggerTypeEnum.RETRY, (log.getExecutorFailRetryCount()-1), log.getExecutorShardingParam(), null);
|
|
67
|
+ String retryMsg = "<br><br><span style=\"color:#F39C12;\" > >>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<< </span><br>";
|
|
68
|
+ log.setTriggerMsg(log.getTriggerMsg() + retryMsg);
|
|
69
|
+ XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateTriggerInfo(log);
|
61
|
70
|
}
|
62
|
|
- if (IJobHandler.SUCCESS.getCode() == log.getTriggerCode() && log.getHandleCode() == 0) {
|
63
|
|
- // job running
|
64
|
|
- JobFailMonitorHelper.monitor(jobLogId);
|
65
|
|
- logger.debug(">>>>>>>>>>> job monitor, job running, JobLogId:{}", jobLogId);
|
66
|
|
- } else if (IJobHandler.SUCCESS.getCode() == log.getHandleCode()) {
|
67
|
|
- // job success, pass
|
68
|
|
- logger.info(">>>>>>>>>>> job monitor, job success, JobLogId:{}", jobLogId);
|
69
|
|
- } else /*if (IJobHandler.FAIL.getCode() == log.getTriggerCode()
|
70
|
|
- || IJobHandler.FAIL.getCode() == log.getHandleCode()
|
71
|
|
- || IJobHandler.FAIL_RETRY.getCode() == log.getHandleCode() )*/ {
|
72
|
|
-
|
73
|
|
- // job fail,
|
74
|
|
-
|
75
|
|
- // 1、fail retry
|
76
|
|
- XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId());
|
77
|
|
-
|
78
|
|
- if (log.getExecutorFailRetryCount() > 0) {
|
79
|
|
- JobTriggerPoolHelper.trigger(log.getJobId(), TriggerTypeEnum.RETRY, (log.getExecutorFailRetryCount()-1), log.getExecutorShardingParam(), null);
|
80
|
|
- String retryMsg = "<br><br><span style=\"color:#F39C12;\" > >>>>>>>>>>>"+ I18nUtil.getString("jobconf_trigger_type_retry") +"<<<<<<<<<<< </span><br>";
|
81
|
|
- log.setTriggerMsg(log.getTriggerMsg() + retryMsg);
|
82
|
|
- XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateTriggerInfo(log);
|
83
|
|
- }
|
84
|
71
|
|
85
|
|
- // 2、fail alarm
|
86
|
|
- failAlarm(info, log);
|
|
72
|
+ // 2、fail alarm monitor
|
|
73
|
+ int newAlarmStatus = 0; // 告警状态:0-默认、-1=锁定状态、1-无需告警、2-告警成功、3-告警失败
|
|
74
|
+ if (info!=null && info.getAlarmEmail()!=null && info.getAlarmEmail().trim().length()>0) {
|
|
75
|
+ boolean alarmResult = true;
|
|
76
|
+ try {
|
|
77
|
+ alarmResult = failAlarm(info, log);
|
|
78
|
+ } catch (Exception e) {
|
|
79
|
+ alarmResult = false;
|
|
80
|
+ logger.error(e.getMessage(), e);
|
|
81
|
+ }
|
|
82
|
+ newAlarmStatus = alarmResult?2:3;
|
|
83
|
+ } else {
|
|
84
|
+ newAlarmStatus = 1;
|
|
85
|
+ }
|
87
|
86
|
|
88
|
|
- logger.info(">>>>>>>>>>> job monitor, job fail, JobLogId:{}", jobLogId);
|
89
|
|
- }/* else {
|
90
|
|
- JobFailMonitorHelper.monitor(jobLogId);
|
91
|
|
- logger.info(">>>>>>>>>>> job monitor, job status unknown, JobLogId:{}", jobLogId);
|
92
|
|
- }*/
|
|
87
|
+ XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().updateAlarmStatus(failLogId, -1, newAlarmStatus);
|
93
|
88
|
}
|
94
|
89
|
}
|
95
|
90
|
|
|
@@ -99,22 +94,6 @@ public class JobFailMonitorHelper {
|
99
|
94
|
}
|
100
|
95
|
}
|
101
|
96
|
|
102
|
|
- // monitor all clear
|
103
|
|
- List<Integer> jobLogIdList = new ArrayList<Integer>();
|
104
|
|
- int drainToNum = getInstance().queue.drainTo(jobLogIdList);
|
105
|
|
- if (jobLogIdList!=null && jobLogIdList.size()>0) {
|
106
|
|
- for (Integer jobLogId: jobLogIdList) {
|
107
|
|
- XxlJobLog log = XxlJobAdminConfig.getAdminConfig().getXxlJobLogDao().load(jobLogId);
|
108
|
|
- if (ReturnT.FAIL_CODE == log.getTriggerCode()|| ReturnT.FAIL_CODE==log.getHandleCode()) {
|
109
|
|
- // job fail,
|
110
|
|
- XxlJobInfo info = XxlJobAdminConfig.getAdminConfig().getXxlJobInfoDao().loadById(log.getJobId());
|
111
|
|
-
|
112
|
|
- failAlarm(info, log);
|
113
|
|
- logger.info(">>>>>>>>>>> job monitor last, job fail, JobLogId:{}", jobLogId);
|
114
|
|
- }
|
115
|
|
- }
|
116
|
|
- }
|
117
|
|
-
|
118
|
97
|
}
|
119
|
98
|
});
|
120
|
99
|
monitorThread.setDaemon(true);
|
|
@@ -131,11 +110,6 @@ public class JobFailMonitorHelper {
|
131
|
110
|
logger.error(e.getMessage(), e);
|
132
|
111
|
}
|
133
|
112
|
}
|
134
|
|
-
|
135
|
|
- // producer
|
136
|
|
- public static void monitor(int jobLogId){
|
137
|
|
- getInstance().queue.offer(jobLogId);
|
138
|
|
- }
|
139
|
113
|
|
140
|
114
|
|
141
|
115
|
// ---------------------- alarm ----------------------
|
|
@@ -168,7 +142,8 @@ public class JobFailMonitorHelper {
|
168
|
142
|
*
|
169
|
143
|
* @param jobLog
|
170
|
144
|
*/
|
171
|
|
- private void failAlarm(XxlJobInfo info, XxlJobLog jobLog){
|
|
145
|
+ private boolean failAlarm(XxlJobInfo info, XxlJobLog jobLog){
|
|
146
|
+ boolean alarmResult = true;
|
172
|
147
|
|
173
|
148
|
// send monitor email
|
174
|
149
|
if (info!=null && info.getAlarmEmail()!=null && info.getAlarmEmail().trim().length()>0) {
|
|
@@ -205,8 +180,10 @@ public class JobFailMonitorHelper {
|
205
|
180
|
helper.setText(content, true);
|
206
|
181
|
|
207
|
182
|
XxlJobAdminConfig.getAdminConfig().getMailSender().send(mimeMessage);
|
208
|
|
- } catch (UnsupportedEncodingException | MessagingException e) {
|
|
183
|
+ } catch (Exception e) {
|
209
|
184
|
logger.error(">>>>>>>>>>> job monitor alarm email send error, JobLogId:{}", jobLog.getId(), e);
|
|
185
|
+
|
|
186
|
+ alarmResult = false;
|
210
|
187
|
}
|
211
|
188
|
|
212
|
189
|
}
|
|
@@ -214,6 +191,8 @@ public class JobFailMonitorHelper {
|
214
|
191
|
|
215
|
192
|
// TODO, custom alarm strategy, such as sms
|
216
|
193
|
|
|
194
|
+
|
|
195
|
+ return alarmResult;
|
217
|
196
|
}
|
218
|
197
|
|
219
|
198
|
}
|