Hello! We are having some difficulties when using the ns_http command with sites using 8-bit encoding.
The ns_http command does not convert the received data, so we must use the 'encoding convertfrom' command. Sometimes converted strings become corrupted. For example, there is a server with output encoding iso-8859-2: if the server passes 'äöüŁ', then after conversion we get 'äöüŁ' (correct); if the server passes 'ÄÖÜŁ', then after conversion we get 'ÄÖ#' (corrupted). See attached ns_http.test1 for example (test 1.2 fails). Such strings can be found in any 8-bit encoding (to see run attached http_charsets.test with 'pairsTest' constraint enabled). The source for the ns_http command (tclhttp.c) shows that the problem is using the Tcl_NewStringObj on binary input data (8-bit chars). Two solutions come up: 1) Using Tcl_NewByteArrayObj instead of Tcl_NewStringObj; 2) Using Tcl_ExternalToUtf before using Tcl_NewStringObj, i.e. built-in 'encoding convertfrom'. Attached tclhttp.c.binary-externaltoutf patch modifies the ns_http command: 1) the -binary switch is added to the queue/wait/run sub-commands to use of Tcl_NewByteArrayObj on text pages; 2) without -binary the text page will be converted according to the Content-Type header. Note that the second change requires the TCLHTTP_USE_EXTERNALTOUTF to be defined at compile time. The fixed ns_http command can be tested with the attached ns_http.test2 (see 1.2.1 and 1.2.2). More intensive testing of changes can be done with the http_charsets.test (note commented pairsTest constraint). Also I replaced the 'nstest :: http-0.9 -encoding xxx' with 'ns_http run' in existing encoding.test (see encoding_ns_http.test). All data transformations are successfully performed without explicit decoding. Automatic data decoding is convenient to use, but it changes the behavior of ns_http on 8-bit inputs. These changes could break existing code if someone uses ns_http to inter with 8-bit sites (with risk of data corruption). To use the patched version of ns_http, either remove the 'encoding convertfrom' or add the -binary switch. It should be noted that the -binary switch followed by 'encoding convertfrom' will also be useful for 8-bit sites with missing or incorrect Content-Type. Regards, Oleg Oleinick. PS. Attached files: ns_http.test1 - tests for the current version, shows corruption of 8-bit text; ns_http.test2 - tests for the patched version, shows the correct receipt of 8-bit text; tclhttp.c.binary-externaltoutf.patch - patch for changing the ns_http command, adds the -binary switch and text data auto-decoding; http_charsets.test - tests for ns_http, suitable for both the current and the patched version; encoding_ns_http.test - like existing encoding.test, with 'nstest :: http-0.9 -encoding xxx' replaces by new 'ns_http run';
ns_http.test1
Description: Binary data
ns_http.test2
Description: Binary data
diff --git a/nsd/tclhttp.c b/nsd/tclhttp.c
index cef2ee7d..2693fcfe 100644
--- a/nsd/tclhttp.c
+++ b/nsd/tclhttp.c
@@ -40,6 +40,12 @@
#include <openssl/err.h>
#endif
+/*
+ * temporary solution for ns_http wait/queue/run -binary
+ * FIXME: move this definition to nsd.h
+ */
+#define NS_HTTP_FLAG_BINARY (1<<4)
+
/*
* The maximum number of bytes we can send to TLS
* in one operation is 2^14 => 16384 (see RFC 5246).
@@ -658,7 +664,7 @@ HttpWaitObjCmd(
NsHttpTask *httpPtr = NULL;
char *id = NULL, *outputFileName = NULL;
- int result = TCL_OK, decompress = 0;
+ int result = TCL_OK, decompress = 0, binary = 0;
Tcl_WideInt spoolLimit = -1;
Tcl_Obj *elapsedVarObj = NULL,
*resultVarObj = NULL,
@@ -677,6 +683,7 @@ HttpWaitObjCmd(
{"-outputfile", Ns_ObjvString, &outputFileName, NULL},
{"-spoolsize", Ns_ObjvMemUnit, &spoolLimit, NULL},
{"-decompress", Ns_ObjvBool, &decompress, INT2PTR(NS_TRUE)},
+ {"-binary", Ns_ObjvBool, &binary, INT2PTR(NS_TRUE)},
{NULL, NULL, NULL, NULL}
};
Ns_ObjvSpec args[] = {
@@ -707,6 +714,10 @@ HttpWaitObjCmd(
Ns_Log(Warning, "ns_http_wait: -decompress option is deprecated");
httpPtr->flags |= NS_HTTP_FLAG_DECOMPRESS;
}
+ if (binary != 0) {
+ Ns_Log(Warning, "ns_http_wait: -binary option is deprecated");
+ httpPtr->flags |= NS_HTTP_FLAG_BINARY;
+ }
if (spoolLimit > -1) {
Ns_Log(Warning, "ns_http_wait: -spoolsize option is deprecated");
httpPtr->spoolLimit = spoolLimit;
@@ -1215,7 +1226,7 @@ HttpQueue(
bool run
) {
Tcl_Interp *interp;
- int result = TCL_OK, decompress = 0;
+ int result = TCL_OK, decompress = 0, binary = 0;
Tcl_WideInt spoolLimit = -1;
int verifyCert = 0, keepHostHdr = 0;
NsHttpTask *httpPtr = NULL;
@@ -1239,6 +1250,7 @@ HttpQueue(
Ns_ObjvValueRange sizeRange = {0, LLONG_MAX};
Ns_ObjvSpec opts[] = {
+ {"-binary", Ns_ObjvBool, &binary, INT2PTR(NS_TRUE)},
{"-body", Ns_ObjvObj, &bodyObj, NULL},
{"-body_size", Ns_ObjvWideInt, &bodySize, &sizeRange},
{"-body_file", Ns_ObjvString, &bodyFileName, NULL},
@@ -1358,7 +1370,6 @@ HttpQueue(
HttpSpliceChannels(interp, httpPtr);
HttpClose(httpPtr);
}
-
} else {
/*
@@ -1376,6 +1387,9 @@ HttpQueue(
if (decompress != 0) {
httpPtr->flags |= NS_HTTP_FLAG_DECOMPRESS;
}
+ if (binary != 0) {
+ httpPtr->flags |= NS_HTTP_FLAG_BINARY;
+ }
httpPtr->servPtr = itPtr->servPtr;
httpPtr->task = Ns_TaskTimedCreate(httpPtr->sock, HttpProc, httpPtr, expirePtr);
@@ -1483,7 +1497,6 @@ HttpGetResult(
NS_NONNULL_ASSERT(interp != NULL);
NS_NONNULL_ASSERT(httpPtr != NULL);
-
if (httpPtr->error != NULL) {
if (httpPtr->finalSockState == NS_SOCK_TIMEOUT) {
Tcl_SetErrorCode(interp, errorCodeTimeoutString, (char *)0L);
@@ -1495,6 +1508,9 @@ HttpGetResult(
}
if (httpPtr->recvSpoolMode == NS_FALSE) {
+#if defined(TCLHTTP_USE_EXTERNALTOUTF)
+ Tcl_Encoding encoding = NULL;
+#endif
bool binary = NS_FALSE;
int cSize;
char *cData;
@@ -1517,6 +1533,9 @@ HttpGetResult(
binary = NS_TRUE;
}
}
+ if ((httpPtr->flags & NS_HTTP_FLAG_BINARY) != 0u) {
+ binary = NS_TRUE;
+ }
if (binary == NS_FALSE) {
char *cType = NULL;
@@ -1529,6 +1548,14 @@ HttpGetResult(
* completely regular text formats!
*/
binary = Ns_IsBinaryMimeType(cType);
+#if defined(TCLHTTP_USE_EXTERNALTOUTF)
+ if (binary == NS_FALSE) {
+ encoding = Ns_GetTypeEncoding(cType);
+ if (encoding == NULL) {
+ encoding = NS_utf8Encoding;
+ }
+ }
+#endif
}
}
@@ -1538,7 +1565,15 @@ HttpGetResult(
if (binary == NS_TRUE) {
replyBodyObj = Tcl_NewByteArrayObj((unsigned char *)cData, cSize);
} else {
+#if defined(TCLHTTP_USE_EXTERNALTOUTF)
+ Tcl_DString ds;
+ Tcl_DStringInit(&ds);
+ Tcl_ExternalToUtfDString(encoding, cData, cSize, &ds);
+ replyBodyObj = Tcl_NewStringObj(Tcl_DStringValue(&ds), -1);
+ Tcl_DStringFree(&ds);
+#else
replyBodyObj = Tcl_NewStringObj(cData, cSize);
+#endif
}
}
http_charsets.test
Description: Binary data
encoding_ns_http.test
Description: Binary data
_______________________________________________ naviserver-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/naviserver-devel
