Package: release.debian.org Severity: normal User: release.debian....@packages.debian.org Usertags: unblock
Please unblock package catdoc This version is an RC bug fix (#692076). debdiff to the version in testing is attached. The majority of the patch is the fix for #692073 to remove the .pc subdirectory inadvertently introduced in the last upstream release and which prevented a sane fix for the RC bug fix. Thanks. unblock catdoc/0.94.4-1.1 -- System Information: Debian Release: wheezy/sid APT prefers unstable APT policy: (500, 'unstable'), (500, 'stable'), (1, 'experimental') Architecture: amd64 (x86_64) Foreign Architectures: armel i386 armhf Kernel: Linux 3.2.0-3-amd64 (SMP w/4 CPU cores) Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8) (ignored: LC_ALL set to en_GB.UTF-8) Shell: /bin/sh linked to /bin/dash
diffstat for catdoc-0.94.3 catdoc-0.94.4 .pc/.quilt_patches | 1 .pc/.quilt_series | 1 .pc/.version | 1 .pc/applied-patches | 1 .pc/debian-changes-0.94.3-1/doc/catdoc.txt | 258 ---------------------------- .pc/debian-changes-0.94.3-1/doc/catppt.txt | 51 ----- .pc/debian-changes-0.94.3-1/doc/xls2csv.txt | 85 --------- configure | 2 configure.in | 2 debian/changelog | 12 + doc/catdoc.1 | 2 doc/catppt.1 | 2 doc/wordview.1 | 2 doc/xls2csv.1 | 2 src/xlsparse.c | 4 tarball.sh | 18 + 16 files changed, 34 insertions(+), 410 deletions(-) diff -Nru catdoc-0.94.3/configure catdoc-0.94.4/configure --- catdoc-0.94.3/configure 2012-06-10 14:02:08.000000000 +0100 +++ catdoc-0.94.4/configure 2012-12-03 18:01:26.000000000 +0000 @@ -541,7 +541,7 @@ fi -catdoc_version=0.94.2 +catdoc_version=0.94.4 # Extract the first word of "gcc", so it can be a program name with args. set dummy gcc; ac_word=$2 echo $ac_n "checking for $ac_word""... $ac_c" 1>&6 diff -Nru catdoc-0.94.3/configure.in catdoc-0.94.4/configure.in --- catdoc-0.94.3/configure.in 2012-06-10 12:35:25.000000000 +0100 +++ catdoc-0.94.4/configure.in 2012-12-03 18:01:31.000000000 +0000 @@ -1,6 +1,6 @@ dnl Process this file with autoconf to produce a configure script. AC_INIT(acconfig.h) -catdoc_version=0.94.2 +catdoc_version=0.94.4 dnl Checks for programs. AC_PROG_CC case ${CC} in diff -Nru catdoc-0.94.3/debian/changelog catdoc-0.94.4/debian/changelog --- catdoc-0.94.3/debian/changelog 2012-06-10 13:51:32.000000000 +0100 +++ catdoc-0.94.4/debian/changelog 2012-12-03 18:50:42.000000000 +0000 @@ -1,3 +1,15 @@ +catdoc (0.94.4-1.1) unstable; urgency=low + + * Non-maintainer upload. + * New upstream release to remove .pc subdirectory from + the orig tarball (Closes: #692073). Includes updating + version strings in generated manpages. + * Remove extra ';' in src/xlsparse.c which turned for loop in + xlsparse into a buffer overflow (Closes: #692076), applies + patch by Olly Betts <o...@survex.com>. + + -- Neil Williams <codeh...@debian.org> Mon, 03 Dec 2012 18:22:47 +0000 + catdoc (0.94.3-1) unstable; urgency=low * Declare new upstream release diff -Nru catdoc-0.94.3/doc/catdoc.1 catdoc-0.94.4/doc/catdoc.1 --- catdoc-0.94.3/doc/catdoc.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/catdoc.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH catdoc 1 "Version 0.94.2" "MS-Word reader" +.TH catdoc 1 "Version 0.94.4" "MS-Word reader" .SH NAME catdoc \- reads MS-Word file and puts its content as plain text on standard output .SH SYNOPSIS diff -Nru catdoc-0.94.3/doc/catppt.1 catdoc-0.94.4/doc/catppt.1 --- catdoc-0.94.3/doc/catppt.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/catppt.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH ppt2text 1 "Version 0.94.2" "MS-PowerPoint reader" +.TH ppt2text 1 "Version 0.94.4" "MS-PowerPoint reader" .SH NAME catppt \- reads MS-PowerPoint file and puts its content on standard output .SH SYNOPSIS diff -Nru catdoc-0.94.3/doc/wordview.1 catdoc-0.94.4/doc/wordview.1 --- catdoc-0.94.3/doc/wordview.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/wordview.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH wordview 1 "Version 0.94.2" "MS-Word reader" +.TH wordview 1 "Version 0.94.4" "MS-Word reader" .SH NAME wordview \- displays text contained in MS-Word file in X window diff -Nru catdoc-0.94.3/doc/xls2csv.1 catdoc-0.94.4/doc/xls2csv.1 --- catdoc-0.94.3/doc/xls2csv.1 2012-06-10 14:04:16.000000000 +0100 +++ catdoc-0.94.4/doc/xls2csv.1 2012-12-03 18:54:22.000000000 +0000 @@ -1,4 +1,4 @@ -.TH xls2csv 1 "Version 0.94.2" "MS-Word reader" +.TH xls2csv 1 "Version 0.94.4" "MS-Word reader" .SH NAME xls2csv \- reads MS-Excel file and puts its content as comma-separated data on standard output .SH SYNOPSIS diff -Nru catdoc-0.94.3/.pc/applied-patches catdoc-0.94.4/.pc/applied-patches --- catdoc-0.94.3/.pc/applied-patches 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/applied-patches 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -debian-changes-0.94.3-1 diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catdoc.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catdoc.txt --- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catdoc.txt 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catdoc.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,258 +0,0 @@ -catdoc(1) catdoc(1) - - - -NAME - catdoc - reads MS-Word file and puts its content as plain text on stan‐ - dard output - -SYNOPSIS - catdoc [-vlu8btawxV] [-m number] [ -s charset] [ -d charset] [ -f out‐ - put-format] file - - -DESCRIPTION - catdoc behaves much like cat(1) but it reads MS-Word file and produces - human-readable text on standard output. Optionally it can use latex(1) - escape sequences for characters which have special meaning for LaTeX. - It also makes some effort to recognize MS-Word tables, although it - never tries to write correct headers for LaTeX tabular environment. - Additional output formats, such is HTML can be easily defined. - - catdoc doesn't attempt to extract formatting information other than - tables from MS-Word document, so different output modes means mainly - that different characters should be escaped and different ways used to - represent characters, missing from output charset. See CHARACTER SUB‐ - STITUTION below - - - catdoc uses internal unicode(7) representation of text, so it is able - to convert texts when charset in source document doesn't match charset - on target system. See CHARACTER SETS below. - - If no file names supplied, catdoc processes its standard input unless - it is terminal. It is unlikely that somebody could type Word document - from keyboard, so if catdoc invoked without arguments and stdin is not - redirected, it prints brief usage message and exits. Processing of - standard input (even among other files) can be forced using dash '-' as - file name. - - By default, catdoc wraps lines which are more than 72 chars long and - separates paragraphs by blank lines. This behavior can be turned of by - -w switch. In wide mode catdoc prints each paragraph as one long line, - suitable for import into word processors which perform word wrapping. - - - -OPTIONS - -a - shortcut for -f ascii. Produces ASCII text as output. Sepa‐ - rates table columns with TAB - - -b - process broken MS-Word file. Normally, catdoc checks if first - 8 bytes of file is Microsoft OLE signature. If so, it processes - file, otherwise it just copies it to stdin. It is intended to - use catdoc as filter for viewing all files with .doc extension. - - -dcharset - - specifies destination charset name. Charset file has format - described in CHARACTER SETS below and should have .txt exten‐ - sion and reside in catdoc library directory ( ${exec_pre‐ - fix}/lib/catdoc). By default, current locale charset is used if - langinfo support compiled in. - - -fformat - - specifies output format as described in CHARACTER SUBSTITU‐ - TION below. catdoc comes with two output formats - ascii and - tex. You can add your own if you wish. - - -l Causes catdoc to list names of available charsets to the stdout - and exit successfully. - - -mnumber - Specifies right margin for text (default 72). -m 0 is equiva‐ - lent to -w - - -scharset - Specifies source charset. (one used in Word document), if Word - document doesn't contain UTF-16 text. When reading rtf docu‐ - ments, it is typically not necessary, because rtf documents - contain ansicpg specification. But it can be set wrong by Word - (I've seen RTF documents on Russian, where cp1252 was speci‐ - fied). In this case this option would take precedence over - charset, specified in the document. But source_charset state‐ - ment in the configuration file have less priority than charset - in the document. - - -t - shortcut for -f tex - converts all printable chars, which have special meaning for - LaTeX(1) into appropriate control sequences. Separates table - columns by &. - - -u - declares that Word document contain UNICODE (UTF-16) - representation of text (as some Word-97 documents). If catdoc - fails to correct Word document with default charset, try - this option. - - -8 - declares is Word document is 8 bit. Just in case that catdoc - recognizes file format incorrectly. - - -w disables word wrapping. By default catdoc output is split into - lines not longer than 72 (or number, specified by -m option) - characters and paragraphs are separated by blank line. With - this option each paragraph is one long line. - - -x causes catdoc to output unknown UNICODE character as \xNNNN, - instead of question marks. - - -v causes catdoc to print some useless information about word doc‐ - ument structure to stdout before actual start of text. - - -V outputs catdoc version - - -CHARACTER SETS - When processing MS-Word file catdoc uses information about two charac‐ - ter sets, typically different - - input and output. They are stored in plain text files in catdoc - data directory. Character set files should contain two whitespace-sepa‐ - rated hexadecimal numbers - 8-bit code in character set and 16-bit Uni‐ - code code. Anything from hash mark to end of line is ignored, as well - as blank lines. - - catdoc distribution includes some of these character sets. Additional - character set definitions, directly usable by catdoc can be obtained - from ftp.unicode.org. Charset files have .txt suffix, which shouldn't - be specified in command-line or configuration files. - - Note that catdoc is distributed with Cyrillic charsets as default. If - you are not Russian, you probably don't want it, an should reconfigure - catdoc at compile time or in runtime configuration file. - - When dealing with documents with charsets other than default, remember - that Microsoft never uses ISO charsets. While letters in, say cp1252 - are at the same position as in ISO-8859-1, some punctuation signs would - be lost, if you specify ISO-8859-1 as input charset. If you use cp1252, - catdoc would deal with those signs as described in CHARACTER SUBSTITU‐ - TION below. - - -CHARACTER SUBSTITUTION - catdoc converts MS-Word file into following internal Unicode represen‐ - tation: - - 1. Paragraphs are separated by ASCII Line Feed symbol (0x000A) - - 2. Table cells within row are separated by ASCII Field Separator symbol - (0x001C) - - 3. Table rows are separated by ASCII Record Separator (0x001E) - - 4. All printable characters, including whitespace are represented with - their - respective UNICODE codes. - - This UNICODE representation is subsequently converted into 8-bit text - in target character set using following four-step algorithm: - - 1. List of special characters is searched for given Unicode character. - If found, then appropriate multi-character sequence is output - instead of character. - - 2. If there is an equivalent in target character set, it is output. - - 3. Otherwise, replacement list is searched and, if there is multi-char‐ - acter - substitution for this UNICODE char, it is output. - - 4. If all above fails, "Unknown char" symbol (question mark) is output. - - Lists of special characters and list of substitution are character set- - independent, because special chars should be escaped regardless of - their existence in target character set (usually, they are parts of - US-ASCII, and therefore exist in any character set) and replacement - list is searched only for those characters, which are not found in tar‐ - get character set. - - These lists are stored in catdoc data directory in files with prefix of - format name. These files have following format: - - Each line can be either comment (starting with hash mark) or contain - hexadecimal UNICODE value, separated by whitespace from string, which - would be substituted instead of it. If string contain no whitespace it - can be used as is, otherwise it should be enclosed in single or double - quotes. Usual backslash sequences like '\n','\t' can be used in these - string. - - - -RUNTIME CONFIGURATION - Upon startup catdoc reads its system-wide configuration file /etc/cat‐ - docrc and then user-specific configuration file ${HOME}/.catdocrc. - - These files can contain following directives: - - source_charset = charset-name - Sets default source charset, which would be used if no -s - option specified. Consult configuration of nearby windows work‐ - station to find one you need. - - target_charset = charset-name - Sets default output charset. You probably know, which one you - use. - - charset_path = directory-list - colon-separated list of directories, which are searched for - charset files. This allows you to install additional charsets - in your home directory. If first directory component of path - is ~ it is replaced by contents of HOME environment variable. - On MS-DOS platform, if directory name starts with %s, it is - replaced with directory of executable file. Empty element in - list (i.e. two consequitve colons) is considered current direc‐ - tory. - - map_path = directory-list - colon-separated list of directories, which are searched for - special character map and replacement map. Same substitution - rules as in charset_path are applied. - - format = format name - Output format which would be used by default. catdoc comes - with two formats - ascii and tex but nothing prevents you from - writing your own format (set two map files - special character - map and replacement map). - - unknown_char = character specification - sets character to output instead of unknown Unicode character - (default '?') Character specification can have one of two form - - character enclosed in single quotes or hexadecimal code. - - use_locale =(yes|no) - Enables or disables automatic selection of output charset - (default yes), - based on system locale settings (if enabled at compile time). - If automatic detection is enabled, than output charset settings - in the configuration files (but not in the command line) are - ignored, and current system locale charset is used instead. - There are no automatic choice of input charset, based of locale - language, because most modern Word files (since Word 97) are - Unicode anyway - - -BUGS - Doesn't handle fast-saves properly. Prints footnotes as separate para‐ - graphs at the end of file, instead of producing correct LaTeX commands. - Cannot distinguish between empty table cell and end of table row. - - - - -SEE ALSO - xls2csv(1), cat(1), strings(1), utf(4), unicode(7) - - -AUTHOR - V.B.Wagner <vi...@45.free.net> - - - -MS-Word reader Version 0.94.2 catdoc(1) diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catppt.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catppt.txt --- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/catppt.txt 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/catppt.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,51 +0,0 @@ -ppt2text(1) ppt2text(1) - - - -NAME - catppt - reads MS-PowerPoint file and puts its content on standard out‐ - put - -SYNOPSIS - catppt [-lV] [-b string ] [-s charset ] [-d charset ] files - - -DESCRIPTION - catppt reads MS-PowerPoint presentations and dumps its content to std‐ - out. - -OPTIONS - -l list known charsets and exit successfully - - -bstring - slides break string. This string (by default - formfeed) would - be output at the end of each slide page. - - - -dcharset` - - specifies destination charset name. Charset file has format - described in CHARACTER SETS section of catdoc(1) manual page. - By default, current locale charset would be used if langinfo - support was enabled at the compile time. - - - -scharset - - specifies source charset. Typically, PowerPoint files use - UNICODE strings with known charsets, but for some reason you - may wish to override it. - - - -V outputs version number - - -SEE ALSO - cat(1), catdoc(1), xls2csv(1), strings(1), utf(4), unicode(4) - - -AUTHOR - Alex Ott <alex...@gmail.com> - - - - -MS-PowerPoint reader Version 0.94.2 ppt2text(1) diff -Nru catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt --- catdoc-0.94.3/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/debian-changes-0.94.3-1/doc/xls2csv.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,85 +0,0 @@ -xls2csv(1) xls2csv(1) - - - -NAME - xls2csv - reads MS-Excel file and puts its content as comma-separated - data on standard output - -SYNOPSIS - xls2csv [-xlV] [-f format ] [-b string ] [-s charset ] [-d charset - ] [-q number ] [-c char] files - - -DESCRIPTION - xls2csv reads MS-Excel spreadsheet and dumps its content as comma-sepa‐ - rated values to stdout. Numbers are printed without delimiters, strings - are enclosed in the double quotes. Double-quotes inside string are dou‐ - bled. - -OPTIONS - -x print unknown Unicode chars as \xNNNN, rather than as question - marks - - -l list known charsets and exit successfully - - -cchar cell separator char. By default - comma. - - -bstring - sheet break string. This string (by default - formfeed) would - be output at the end of each workbook page. This string is - printed after page starting at start of line, but no linefeed - would be automatically added at the end of string. Include new‐ - line at the ent of sheet separator if you want it to appear on - separate line by itself - - -gnumber number of decimal digits in the numbers. By default maximal - double precision (system-dependent macro DBL_DIG) is used. - - -qnumber - set quote mode. In quote mode 0 cell contents is never quoted. - In quote mode 1 only strings which contain spaces, double - quotes or commas are quoted. In quote mode 2 (default) all - cells with type string are quoted. In quote mode 3 all cells - are quoted. - - - -dcharset` - - specifies destination charset name. Charset file has format - described in CHARACTER SETS section of catdoc(1) manual page. - By default, current locale charset would be used if langinfo - support was enabled at the compile time. - - - -scharset - - specifies source charset. Typically, Excel files have CODE - PAGE record, which denotes input charset, but for some reason - you may wish to override it. - - -fformat - - specifies date/time format to use for output of all Excel - date and time values. If this option is not specified, for‐ - mat, specified in the spreadsheet is used. On POSIX system any - format, allowed by strftime(3) can be used as value of this - option. Under MS-DOS xls2csv implements limited set of strftime - formats, namely m, d, y, Y, b, l, p, H, M, S. - - - -V outputs version number - - -FILES - ${HOME}/.catdocrc, catdoc charset files and substitution map files (see - catdoc(1) manual page for details, - - -SEE ALSO - cat(1), catdoc(1), strings(1), utf8(7), unicode(7) - - -AUTHOR - V.B.Wagner <vi...@45.free.net>, based on biffview by David Rysdam - - - -MS-Word reader Version 0.94.2 xls2csv(1) diff -Nru catdoc-0.94.3/.pc/.quilt_patches catdoc-0.94.4/.pc/.quilt_patches --- catdoc-0.94.3/.pc/.quilt_patches 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/.quilt_patches 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -debian/patches diff -Nru catdoc-0.94.3/.pc/.quilt_series catdoc-0.94.4/.pc/.quilt_series --- catdoc-0.94.3/.pc/.quilt_series 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/.quilt_series 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -series diff -Nru catdoc-0.94.3/.pc/.version catdoc-0.94.4/.pc/.version --- catdoc-0.94.3/.pc/.version 2012-06-10 14:26:51.000000000 +0100 +++ catdoc-0.94.4/.pc/.version 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -2 diff -Nru catdoc-0.94.3/src/xlsparse.c catdoc-0.94.4/src/xlsparse.c --- catdoc-0.94.3/src/xlsparse.c 2012-06-10 13:47:37.000000000 +0100 +++ catdoc-0.94.4/src/xlsparse.c 2012-12-03 18:03:52.000000000 +0000 @@ -589,8 +589,8 @@ void CleanUpFormatIdxUsed() { int i; - for (i=0;i<NUMOFDATEFORMATS; i++); - FormatIdxUsed[i]=0; + for (i=0;i<NUMOFDATEFORMATS; i++) + FormatIdxUsed[i]=0; } /* diff -Nru catdoc-0.94.3/tarball.sh catdoc-0.94.4/tarball.sh --- catdoc-0.94.3/tarball.sh 2012-06-10 14:02:08.000000000 +0100 +++ catdoc-0.94.4/tarball.sh 2012-12-03 18:49:06.000000000 +0000 @@ -5,8 +5,18 @@ set -e -debclean -cd ../ -tar -czf catdoc-0.94.3.tar.gz ./catdoc-0.94.3 --exclude=debian --exclude=.svn - ln -sf catdoc-0.94.3.tar.gz catdoc_0.94.3.orig.tar.gz +test ! -d .pc + +VERSION=0.94.4 +#debclean +find . -name '*.o' -delete +rm -f config.cache config.log config.status Makefile src/catppt src/xls2csv +rm -f doc/Makefile charsets/Makefile build-stamp install-stamp src/catdoc src/wordview src/Makefile +cd ../ +if [ -h catdoc-${VERSION} ]; then + rm catdoc-${VERSION} +fi +ln -s ./wheezy/ catdoc-${VERSION} +tar -czf catdoc-${VERSION}.tar.gz ./catdoc-${VERSION}/* --exclude=debian --exclude=.svn +ln -sf catdoc-${VERSION}.tar.gz catdoc_${VERSION}.orig.tar.gz