Hi,
see answers inline...
Am 25.06.2009 um 22:55 schrieb Damian Viano:
On Fri, Jun 19, 2009 at 05:50:16PM +0200, Alexander Malysh wrote:
Hi,
ok, here we go. This is relative large patch because too much
things must be
merged from private version into official.
Sorry to take this long to get back to you. I've tested it and the
results
looks promising, only some comments inlined below (I left only the
parts I
considered relevant for the comments).
Please test attached patch against CVS head or checkout/follow from/
on:
http://github.com/amalysh/kannel/tree/smpp-throttling
I have to say I backported it to stable and test it there only
since I need
this for production and I'm not inclined to put a full cvs version
just yet.
Also, I haven't tested it in production yet, only against a dumb
smsc based
on Net::SMPP.
This patch implements following:
- new main loop logic
- some inlines for speed
Nice catch :)
- more error handling
- async shutdown
I didn't test this.
diff --git a/gw/smsc/smsc_smpp.c b/gw/smsc/smsc_smpp.c
index cf61122..60cd2df 100644
--- a/gw/smsc/smsc_smpp.c
+++ b/gw/smsc/smsc_smpp.c
@@ -79,6 +79,7 @@
#include "dlr.h"
#include "bearerbox.h"
#include "meta_data.h"
+#include "load.h"
#define SMPP_DEFAULT_CHARSET "UTF-8"
@@ -111,7 +112,7 @@
#define SMPP_MAX_PENDING_SUBMITS 10
#define SMPP_DEFAULT_VERSION 0x34
#define SMPP_DEFAULT_PRIORITY 0
-#define SMPP_THROTTLING_SLEEP_TIME 15
+#define SMPP_THROTTLING_SLEEP_TIME 1
Why change this? I know it's no big deal, but I just don't get why
we don't
want to stop sending for a longer while if the smsc tell us to.
normally you will receive throttling error only for 1 sec timeslot.
Therefore it doesn't make sense to sleep
more as 1 second because next second you should be able to send again.
#define SMPP_DEFAULT_CONNECTION_TIMEOUT 10 *
SMPP_ENQUIRE_LINK_INTERVAL
#define SMPP_DEFAULT_WAITACK 60
#define SMPP_DEFAULT_SHUTDOWN_TIMEOUT 30
Shouldn't be SMPP_DEFAULT_SHUTDOWN_TIMEOUT usually >=
SMPP_DEFAULT_WAITACK so
we wait for the acks instead of shutting down and not getting them?
(I know
this is not exactly your patch, but...)
it could be but we don't want to wait forever for some buggy or
overloaded SMSCs. Again normally SMSC
will send all responses within 30sec.
@@ -912,15 +917,15 @@ static SMPP_PDU *msg_to_pdu(SMPP *smpp, Msg
*msg)
* Note: we always send in UTC and just define "Time Difference"
as 00 and
* direction '+'.
*/
- validity = msg->sms.validity >= 0 ? msg->sms.validity : smpp-
>validityperiod;
- if (validity >= 0) {
+ validity = msg->sms.validity != SMS_PARAM_UNDEFINED ? msg-
>sms.validity : smpp->validityperiod;
+ if (validity != SMS_PARAM_UNDEFINED) {
Could this still be SMS_PARAM_UNDEFINED after the line above? (not
sure if
msg->sms.validity or smpp->validityperiod can be SMS_PARAM_UNDEFINED)
I don't get your question... This line means: set validity from msg
struct or smsc config but validity from msg struct has priority.
Sure can validity still be undefined therefore we check it...
struct tm tm = gw_gmtime(time(NULL) + validity * 60);
pdu->u.submit_sm.validity_period = octstr_format("%02d%02d
%02d%02d%02d%02d000+",
tm.tm_year % 100, tm.tm_mon + 1, tm.tm_mday,
tm.tm_hour, tm.tm_min, tm.tm_sec);
}
- if (msg->sms.deferred >= 0) {
+ if (msg->sms.deferred != SMS_PARAM_UNDEFINED) {
struct tm tm = gw_gmtime(time(NULL) + msg->sms.deferred * 60);
pdu->u.submit_sm.schedule_delivery_time = octstr_format("%02d
%02d%02d%02d%02d%02d000+",
tm.tm_year % 100, tm.tm_mon + 1, tm.tm_mday,
@@ -1860,94 +1897,125 @@ static void io_thread(void *arg)
conn = open_transceiver(smpp);
else
conn = open_receiver(smpp);
-
- last_enquire_sent = last_cleanup = last_response =
date_universal_now();
+
pending_submits = -1;
len = 0;
- smpp->throttling_err_time = 0;
- for (;conn != NULL;) {
- timeout = last_enquire_sent + smpp->enquire_link_interval
- - date_universal_now();
-
- if (conn_wait(conn, timeout) == -1)
+ last_response = last_cleanup = last_enquire_sent =
time(NULL);
+ while(conn != NULL) {
+ ret = read_pdu(smpp, conn, &len, &pdu);
+ if (ret == -1) { /* connection broken */
+ error(0, "SMPP[%s]: I/O error or other error. Re-
connecting.",
+ octstr_get_cstr(smpp->conn->id));
break;
-
- /* unbind
- * Read so long as unbind_resp received or timeout
passed. Otherwise we have
- * double delivered messages.
- */
- if (smpp->quitting) {
- send_unbind(smpp, conn);
- last_response = time(NULL);
- while(conn_wait(conn, 1.00) != -1 &&
- difftime(time(NULL), last_response) <
SMPP_DEFAULT_SHUTDOWN_TIMEOUT &&
- smpp->conn->status != SMSCCONN_DISCONNECTED) {
- if (read_pdu(smpp, conn, &len, &pdu) == 1) {
- dump_pdu("Got PDU:", smpp->conn->id, pdu);
- handle_pdu(smpp, conn, pdu,
&pending_submits);
- smpp_pdu_destroy(pdu);
- }
+ } else if (ret == -2) {
+ /* wrong pdu length , send gnack */
+ len = 0;
+ if (send_gnack(smpp, conn, SMPP_ESME_RINVCMDLEN, 0)
== -1) {
+ error(0, "SMPP[%s]: I/O error or other error.
Re-connecting.",
+ octstr_get_cstr(smpp->conn->id));
+ break;
}
- debug("bb.sms.smpp", 0, "SMPP[%s]: %s: break and
shutting down",
- octstr_get_cstr(smpp->conn->id), __func__);
-
- break;
- }
-
- send_enquire_link(smpp, conn, &last_enquire_sent);
-
- while ((ret = read_pdu(smpp, conn, &len, &pdu)) == 1) {
- last_response = time(NULL);
+ } else if (ret == 1) { /* data available */
/* Deal with the PDU we just got */
dump_pdu("Got PDU:", smpp->conn->id, pdu);
- handle_pdu(smpp, conn, pdu, &pending_submits);
+ ret = handle_pdu(smpp, conn, pdu, &pending_submits);
smpp_pdu_destroy(pdu);
-
+ if (ret == -1) {
+ error(0, "SMPP[%s]: I/O error or other error.
Re-connecting.",
+ octstr_get_cstr(smpp->conn->id));
+ break;
+ }
+
+ /*
+ * check if we are still connected
+ * Note: Function handle_pdu will set status to
SMSCCONN_DISCONNECTED
+ * when unbind was received.
+ */
+ if (smpp->conn->status == SMSCCONN_DISCONNECTED)
+ break;
+
/*
- * check if we are still connected.
- * Note: smsc can send unbind request, so we must
check it.
+ * If we are not bounded then no PDU may coming
from SMSC.
+ * It's just a workaround for buggy SMSC's whoes
send enquire_link's
+ * although link is not bounded. Means: we doesn't
notice these and if link
+ * keep to be not bounden we are reconnect after
defined timeout elapsed.
*/
- if (smpp->conn->status != SMSCCONN_ACTIVE && smpp-
>conn->status != SMSCCONN_ACTIVE_RECV) {
- /* we are disconnected */
- ret = -1;
- break;
+ if (IS_ACTIVE) {
+ /*
+ * Store last response time.
+ */
+ time(&last_response);
}
-
- /* Make sure we send enquire_link even if we read a
lot */
- send_enquire_link(smpp, conn, &last_enquire_sent);
-
- /* Make sure we send even if we read a lot */
- if (transmitter && difftime(time(NULL), smpp-
>throttling_err_time) > SMPP_THROTTLING_SLEEP_TIME) {
- smpp->throttling_err_time = 0;
- send_messages(smpp, conn, &pending_submits);
+ } else { /* no data available */
+ /* check last enquire_resp, if difftime > as
idle_timeout
+ * mark connection as broken.
+ * We have some SMSC connections where connection
seems to be OK, but
+ * in reallity is broken, because no responses
received.
+ */
+ if (smpp->connection_timeout > 0 &&
+ difftime(time(NULL), last_response) > smpp-
>connection_timeout) {
+ /* connection seems to be broken */
+ warning(0, "Got no responses within %ld sec.,
reconnecting...",
+ (long) difftime(time(NULL),
last_response));
+ break;
}
+
+ time(&now);
+ timeout = last_enquire_sent + smpp-
>enquire_link_interval - now;
+ if (!IS_ACTIVE && timeout <= 0)
+ timeout = smpp->enquire_link_interval;
+ if (transmitter && gw_prioqueue_len(smpp-
>msgs_to_send) > 0 &&
+ smpp->throttling_err_time > 0 &&
pending_submits < smpp->max_pending_submits) {
+ time_t tr_timeout = smpp->throttling_err_time +
SMPP_THROTTLING_SLEEP_TIME - now;
+ timeout = timeout > tr_timeout ? tr_timeout :
timeout;
+ } else if (transmitter && gw_prioqueue_len(smpp-
>msgs_to_send) > 0 && smpp->conn->throughput > 0 &&
+ smpp->max_pending_submits >
pending_submits) {
+ double t = 1.0 / smpp->conn->throughput;
+ timeout = t < timeout ? t : timeout;
+ }
I've struggled with this logic for a while and even if I understand
what the
code does I can't seem to get a grasp of the semantic meaning or
algorithm
behind it. So definitely some comments would be greatly appreciated.
Sorry but I think it's just impossible to document... Just read code
and think about all possible cases. Some comments will give
you a hint why such decision was taken.
+ /* sleep a while */
+ if (timeout > 0 && conn_wait(conn, timeout) == -1)
+ break;
}
Thanks for your patch, and hope my comments are welcome.
Damián Viano(Des).