Description: Try to reduce confusion around docx files Author: Olly Betts Bug-Debian: https://bugs.debian.org/758959 Forwarded: no Last-Update: 2014-10-17 --- antiword-0.37.orig/Docs/antiword.1 +++ antiword-0.37/Docs/antiword.1 @@ -14,7 +14,10 @@ documents. .br A wordfile named - stands for a Word document read from the standard input. .br -Only documents made by MS Word version 2 and version 6 or later are supported. +Only documents made by MS Word version 2, 6, 7, 97, 2000 and 2003 are +supported. Newer Word versions default to using a completely different format +consisting of XML files in a ZIP container (usually with a .docx file +extension) which antiword doesn't support. .SH OPTIONS .TP .BI "\-a " papersize --- antiword-0.37.orig/antiword.h +++ antiword-0.37/antiword.h @@ -695,6 +695,7 @@ extern int iInitDocumentDOS(FILE *, long extern BOOL bIsWordForDosFile(FILE *, long); extern BOOL bIsRtfFile(FILE *); extern BOOL bIsWordPerfectFile(FILE *); +extern BOOL bIsZipFile(FILE *); extern BOOL bIsWinWord12File(FILE *, long); extern BOOL bIsMacWord45File(FILE *); extern int iGuessVersionNumber(FILE *, long); --- antiword-0.37.orig/main_u.c +++ antiword-0.37/main_u.c @@ -187,10 +187,17 @@ bProcessFile(const char *szFilename) werr(0, "%s is not a Word Document." " It is probably a Rich Text Format file", szFilename); - } if (bIsWordPerfectFile(pFile)) { + } else if (bIsWordPerfectFile(pFile)) { werr(0, "%s is not a Word Document." " It is probably a Word Perfect file", szFilename); + } else if (bIsZipFile(pFile)) { + werr(0, "%s is not a Word Document." + " It is probably an OpenDocument file or" + " a new-style Microsoft Office XML file" + " (antiword only handles documents from" + " MS Word 2003 and earlier)", + szFilename); } else { #if defined(__dos) werr(0, "%s is not a Word Document or the filename" --- antiword-0.37.orig/wordlib.c +++ antiword-0.37/wordlib.c @@ -131,6 +131,20 @@ bIsWordPerfectFile(FILE *pFile) } /* end of bIsWordPerfectFile */ /* + * This function checks whether the given file is or is not a ZIP file + */ +BOOL +bIsZipFile(FILE *pFile) +{ + static UCHAR aucBytes[] = + { 'P', 'K', 0x03, 0x04 }; + + DBG_MSG("bIsZipFile"); + + return bCheckBytes(pFile, aucBytes, elementsof(aucBytes)); +} /* end of bIsZipFile */ + +/* * This function checks whether the given file is or is not a "Win Word 1 or 2" * document */