Package: samtools
Version: 0.1.19-1
Tags: sid patch
Severity: important
User: debian-mips-dev-disc...@lists.alioth.debian.org
Usertags: mips-patch

'python-pysam' package FTBFS on mips/mipsel due to issues in 'samtools' package.

Build log for 'python-pysam' on mips:
https://buildd.debian.org/status/fetch.php?pkg=python-pysam&arch=mips&ver=0.7.7-1&stamp=1397894446
Build log for 'python-pysam' on mipsel:
https://buildd.debian.org/status/fetch.php?pkg=python-pysam&arch=mipsel&ver=0.7.7-1&stamp=1397887877

Output of samtools executable from samtools package is used as a reference
during tests in python-pysam (its output is compared with output from
pysam python module), but samtools has both unaligned memory access
and system endianness issues.

I have attached patches that fix this. With these patches applied to samtools,
python-pysam builds fine.

Best Regards
Aleksandar Zlicic
Index: samtools-0.1.19/bam_import.c
===================================================================
--- samtools-0.1.19.orig/bam_import.c
+++ samtools-0.1.19/bam_import.c
@@ -21,6 +21,10 @@ void bam_init_header_hash(bam_header_t *
 void bam_destroy_header_hash(bam_header_t *header);
 int32_t bam_get_tid(const bam_header_t *header, const char *seq_name);
 
+#if defined(__i386__) || defined(__i386) || defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(__i686__) || defined(__i686)
+# define ALLOW_UAC
+#endif
+
 unsigned char bam_nt16_table[256] = {
 	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
 	15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
@@ -365,6 +369,10 @@ int sam_read1(tamFile fp, bam_header_t *
 	if (dret != '\n' && dret != '\r') { // aux
 		while (ks_getuntil(ks, KS_SEP_TAB, str, &dret) >= 0) {
 			uint8_t *s, type, key[2];
+#ifndef ALLOW_UAC
+			uint8_t tmpData[8];
+			int j;
+#endif
 			z += str->l + 1;
 			if (str->l < 6 || str->s[2] != ':' || str->s[4] != ':')
 				parse_error(fp->n_lines, "missing colon in auxiliary data");
@@ -410,14 +418,30 @@ int sam_read1(tamFile fp, bam_header_t *
 					}
 				}
 			} else if (type == 'f') {
+#ifndef ALLOW_UAC
+				float *ptmpData = (float*)&tmpData;
+#endif
 				s = alloc_data(b, doff + 5) + doff;
 				*s++ = 'f';
+#ifdef ALLOW_UAC
 				*(float*)s = (float)atof(str->s + 5);
+#else
+				*ptmpData = (float)atof(str->s + 5);
+				for(j=0;j<4;j++) s[j] = tmpData[j];
+#endif
 				s += 4; doff += 5;
 			} else if (type == 'd') {
+#ifndef ALLOW_UAC
+				float *ptmpData = (float*)&tmpData;
+#endif
 				s = alloc_data(b, doff + 9) + doff;
 				*s++ = 'd';
+#ifdef ALLOW_UAC
 				*(float*)s = (float)atof(str->s + 9);
+#else
+				*ptmpData = (float)atof(str->s + 9);
+				for(j=0;j<4;j++) s[j] = tmpData[j];
+#endif
 				s += 8; doff += 9;
 			} else if (type == 'Z' || type == 'H') {
 				int size = 1 + (str->l - 5) + 1;
Index: samtools-0.1.19/bam.c
===================================================================
--- samtools-0.1.19.orig/bam.c
+++ samtools-0.1.19/bam.c
@@ -157,7 +157,7 @@ int bam_header_write(bamFile fp, const b
 	return 0;
 }
 
-static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data)
+static void swap_endian_data(const bam1_core_t *c, int data_len, uint8_t *data, int is_host)
 {
 	uint8_t *s;
 	uint32_t i, *cigar = (uint32_t*)(data + c->l_qname);
@@ -174,16 +174,18 @@ static void swap_endian_data(const bam1_
 		else if (type == 'Z' || type == 'H') { while (*s) ++s; ++s; }
 		else if (type == 'B') {
 			int32_t n, Bsize = bam_aux_type2size(*s);
+			if(!is_host) bam_swap_endian_4p(s+1);
 			memcpy(&n, s + 1, 4);
 			if (1 == Bsize) {
 			} else if (2 == Bsize) {
-				for (i = 0; i < n; i += 2)
+				for (i = 0; i < n*Bsize; i += 2)
 					bam_swap_endian_2p(s + 5 + i);
 			} else if (4 == Bsize) {
-				for (i = 0; i < n; i += 4)
+				for (i = 0; i < n*Bsize; i += 4)
 					bam_swap_endian_4p(s + 5 + i);
 			}
-			bam_swap_endian_4p(s+1); 
+			if(is_host) bam_swap_endian_4p(s+1); 
+			s += n * Bsize + 4;
 		}
 	}
 }
@@ -217,7 +219,7 @@ int bam_read1(bamFile fp, bam1_t *b)
 	}
 	if (bam_read(fp, b->data, b->data_len) != b->data_len) return -4;
 	b->l_aux = b->data_len - c->n_cigar * 4 - c->l_qname - c->l_qseq - (c->l_qseq+1)/2;
-	if (bam_is_be) swap_endian_data(c, b->data_len, b->data);
+	if (bam_is_be) swap_endian_data(c, b->data_len, b->data, 0);
 	if (bam_no_B) bam_remove_B(b);
 	return 4 + block_len;
 }
@@ -240,11 +242,11 @@ inline int bam_write1_core(bamFile fp, c
 		for (i = 0; i < 8; ++i) bam_swap_endian_4p(x + i);
 		y = block_len;
 		bam_write(fp, bam_swap_endian_4p(&y), 4);
-		swap_endian_data(c, data_len, data);
+		swap_endian_data(c, data_len, data, 1);
 	} else bam_write(fp, &block_len, 4);
 	bam_write(fp, x, BAM_CORE_SIZE);
 	bam_write(fp, data, data_len);
-	if (bam_is_be) swap_endian_data(c, data_len, data);
+	if (bam_is_be) swap_endian_data(c, data_len, data, 0);
 	return 4 + block_len;
 }
 

Reply via email to