2000-05-07 22:23:41 +02:00
|
|
|
/*
|
|
|
|
* text-writer -- RTF-to-text translation writer code.
|
|
|
|
*
|
|
|
|
* Read RTF input, write text of document (text extraction).
|
|
|
|
*
|
|
|
|
* Wrapper must call WriterInit() once before processing any files,
|
|
|
|
* then set up input and call BeginFile() for each input file.
|
|
|
|
*
|
|
|
|
* This installs callbacks for the text and control token classes.
|
|
|
|
* The control class is necessary so that special characters such as
|
|
|
|
* \par, \tab, \sect, etc. can be converted.
|
|
|
|
*
|
|
|
|
* It's problematic what to do with text in headers and footers, and
|
|
|
|
* what to do about tables.
|
|
|
|
*
|
|
|
|
* This really is quite a stupid program, for instance, it could keep
|
|
|
|
* track of the current leader character and dump that out when a tab
|
|
|
|
* is encountered.
|
|
|
|
*
|
|
|
|
* 04 Feb 91 Paul DuBois dubois@primate.wisc.edu
|
|
|
|
*
|
|
|
|
* This software may be redistributed without restriction and used for
|
|
|
|
* any purpose whatsoever.
|
|
|
|
*
|
|
|
|
* 04 Feb 91
|
|
|
|
* -Created.
|
|
|
|
* 27 Feb 91
|
|
|
|
* - Updated for distribution 1.05.
|
|
|
|
* 13 Jul 93
|
|
|
|
* - Updated to compile under THINK C 6.0.
|
|
|
|
* 31 Aug 93
|
|
|
|
* - Added Mike Sendall's entries for Macintosh char map.
|
|
|
|
* 07 Sep 93
|
|
|
|
* - Uses charset map and output sequence map for character translation.
|
|
|
|
* 11 Mar 94
|
|
|
|
* - Updated for 1.10 distribution.
|
|
|
|
*/
|
|
|
|
|
2002-03-10 00:29:33 +01:00
|
|
|
#include <stdio.h>
|
2000-05-07 22:23:41 +02:00
|
|
|
|
2002-03-10 00:29:33 +01:00
|
|
|
#include "rtf.h"
|
|
|
|
#include "rtf2text.h"
|
|
|
|
#include "wine/debug.h"
|
2002-01-14 20:44:07 +01:00
|
|
|
|
2002-03-10 00:29:33 +01:00
|
|
|
WINE_DEFAULT_DEBUG_CHANNEL(richedit);
|
2000-05-07 22:23:41 +02:00
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
static void TextClass (RTF_Info *info);
|
|
|
|
static void ControlClass (RTF_Info *info);
|
|
|
|
static void Destination (RTF_Info *info);
|
|
|
|
static void SpecialChar (RTF_Info *info);
|
|
|
|
static void PutStdChar (RTF_Info *info, int stdCode);
|
|
|
|
static void PutLitChar (RTF_Info *info, int c);
|
|
|
|
static void PutLitStr (RTF_Info *info, char *s);
|
|
|
|
|
2000-05-07 22:23:41 +02:00
|
|
|
/*
|
|
|
|
* Initialize the writer.
|
|
|
|
*/
|
|
|
|
|
|
|
|
void
|
2004-01-30 23:56:33 +01:00
|
|
|
WriterInit (RTF_Info *info )
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2004-01-30 23:56:33 +01:00
|
|
|
RTFReadOutputMap (info, info->outMap,1);
|
2000-05-07 22:23:41 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
2004-01-30 23:56:33 +01:00
|
|
|
BeginFile (RTF_Info *info )
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
|
|
|
/* install class callbacks */
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
RTFSetClassCallback (info, rtfText, TextClass);
|
|
|
|
RTFSetClassCallback (info, rtfControl, ControlClass);
|
2000-05-07 22:23:41 +02:00
|
|
|
|
|
|
|
return (1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Write out a character. rtfMajor contains the input character, rtfMinor
|
|
|
|
* contains the corresponding standard character code.
|
|
|
|
*
|
|
|
|
* If the input character isn't in the charset map, try to print some
|
|
|
|
* representation of it.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
2004-01-30 23:56:33 +01:00
|
|
|
TextClass (RTF_Info *info)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
|
|
|
char buf[rtfBufSiz];
|
|
|
|
|
2002-06-01 01:06:46 +02:00
|
|
|
TRACE("\n");
|
2002-08-29 01:43:43 +02:00
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
if (info->rtfFormat == SF_TEXT)
|
|
|
|
PutLitChar (info, info->rtfMajor);
|
|
|
|
else if (info->rtfMinor != rtfSC_nothing)
|
|
|
|
PutStdChar (info, info->rtfMinor);
|
2000-05-07 22:23:41 +02:00
|
|
|
else
|
|
|
|
{
|
2004-01-30 23:56:33 +01:00
|
|
|
if (info->rtfMajor < 128) /* in ASCII range */
|
|
|
|
sprintf (buf, "[[%c]]", info->rtfMajor);
|
2000-05-07 22:23:41 +02:00
|
|
|
else
|
2004-01-30 23:56:33 +01:00
|
|
|
sprintf (buf, "[[\\'%02x]]", info->rtfMajor);
|
|
|
|
PutLitStr (info, buf);
|
2000-05-07 22:23:41 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
2004-01-30 23:56:33 +01:00
|
|
|
ControlClass (RTF_Info *info)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2002-01-14 20:44:07 +01:00
|
|
|
TRACE("\n");
|
2004-01-30 23:56:33 +01:00
|
|
|
switch (info->rtfMajor)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
|
|
|
case rtfDestination:
|
2004-01-30 23:56:33 +01:00
|
|
|
Destination (info);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfSpecialChar:
|
2004-01-30 23:56:33 +01:00
|
|
|
SpecialChar (info);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function notices destinations that should be ignored
|
|
|
|
* and skips to their ends. This keeps, for instance, picture
|
|
|
|
* data from being considered as plain text.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static void
|
2004-01-30 23:56:33 +01:00
|
|
|
Destination (RTF_Info *info)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2002-01-14 20:44:07 +01:00
|
|
|
|
|
|
|
TRACE("\n");
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
switch (info->rtfMinor)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
|
|
|
case rtfPict:
|
|
|
|
case rtfFNContSep:
|
|
|
|
case rtfFNContNotice:
|
|
|
|
case rtfInfo:
|
|
|
|
case rtfIndexRange:
|
|
|
|
case rtfITitle:
|
|
|
|
case rtfISubject:
|
|
|
|
case rtfIAuthor:
|
|
|
|
case rtfIOperator:
|
|
|
|
case rtfIKeywords:
|
|
|
|
case rtfIComment:
|
|
|
|
case rtfIVersion:
|
|
|
|
case rtfIDoccomm:
|
2004-01-30 23:56:33 +01:00
|
|
|
RTFSkipGroup (info);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* The reason these use the rtfSC_xxx thingies instead of just writing
|
|
|
|
* out ' ', '-', '"', etc., is so that the mapping for these characters
|
|
|
|
* can be controlled by the text-map file.
|
|
|
|
*/
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
void SpecialChar (RTF_Info *info)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2002-01-14 20:44:07 +01:00
|
|
|
|
|
|
|
TRACE("\n");
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
switch (info->rtfMinor)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
|
|
|
case rtfPage:
|
|
|
|
case rtfSect:
|
|
|
|
case rtfRow:
|
|
|
|
case rtfLine:
|
|
|
|
case rtfPar:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutLitChar (info, '\n');
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfCell:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_space); /* make sure cells are separated */
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfNoBrkSpace:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_nobrkspace);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfTab:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutLitChar (info, '\t');
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfNoBrkHyphen:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_nobrkhyphen);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfBullet:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_bullet);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfEmDash:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_emdash);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfEnDash:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_endash);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfLQuote:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_quoteleft);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfRQuote:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_quoteright);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfLDblQuote:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_quotedblleft);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
case rtfRDblQuote:
|
2004-01-30 23:56:33 +01:00
|
|
|
PutStdChar (info, rtfSC_quotedblright);
|
2000-05-07 22:23:41 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Eventually this should keep track of the destination of the
|
|
|
|
* current state and only write text when in the initial state.
|
|
|
|
*
|
|
|
|
* If the output sequence is unspecified in the output map, write
|
|
|
|
* the character's standard name instead. This makes map deficiencies
|
|
|
|
* obvious and provides incentive to fix it. :-)
|
|
|
|
*/
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
void PutStdChar (RTF_Info *info, int stdCode)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2002-06-01 01:06:46 +02:00
|
|
|
|
2000-05-07 22:23:41 +02:00
|
|
|
char *oStr = (char *) NULL;
|
|
|
|
char buf[rtfBufSiz];
|
2002-06-01 01:06:46 +02:00
|
|
|
|
2000-05-07 22:23:41 +02:00
|
|
|
/* if (stdCode == rtfSC_nothing)
|
|
|
|
RTFPanic ("Unknown character code, logic error\n");
|
2002-06-01 01:06:46 +02:00
|
|
|
*/
|
2002-01-14 20:44:07 +01:00
|
|
|
TRACE("\n");
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
oStr = info->outMap[stdCode];
|
2000-05-07 22:23:41 +02:00
|
|
|
if (oStr == (char *) NULL) /* no output sequence in map */
|
|
|
|
{
|
2004-01-30 23:56:33 +01:00
|
|
|
sprintf (buf, "[[%s]]", RTFStdCharName (info, stdCode));
|
2000-05-07 22:23:41 +02:00
|
|
|
oStr = buf;
|
|
|
|
}
|
2004-01-30 23:56:33 +01:00
|
|
|
PutLitStr (info, oStr);
|
2000-05-07 22:23:41 +02:00
|
|
|
}
|
|
|
|
|
2004-01-30 23:56:33 +01:00
|
|
|
void PutLitChar (RTF_Info *info, int c)
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2004-08-27 21:15:08 +02:00
|
|
|
if( info->dwOutputCount >= ( sizeof info->OutputBuffer - 1 ) )
|
|
|
|
RTFFlushOutputBuffer( info );
|
|
|
|
info->OutputBuffer[info->dwOutputCount++] = c;
|
2000-05-07 22:23:41 +02:00
|
|
|
}
|
|
|
|
|
2004-08-27 21:15:08 +02:00
|
|
|
void RTFFlushOutputBuffer( RTF_Info *info )
|
|
|
|
{
|
|
|
|
info->OutputBuffer[info->dwOutputCount] = 0;
|
|
|
|
SendMessageA( info->hwndEdit, EM_REPLACESEL, FALSE, (LPARAM) info->OutputBuffer );
|
|
|
|
info->dwOutputCount = 0;
|
|
|
|
}
|
2000-05-07 22:23:41 +02:00
|
|
|
|
2004-08-27 21:15:08 +02:00
|
|
|
static void PutLitStr (RTF_Info *info, char *str )
|
2000-05-07 22:23:41 +02:00
|
|
|
{
|
2004-08-27 21:15:08 +02:00
|
|
|
int len = strlen( str );
|
|
|
|
if( ( len + info->dwOutputCount + 1 ) > sizeof info->OutputBuffer )
|
|
|
|
RTFFlushOutputBuffer( info );
|
|
|
|
if( ( len + 1 ) >= sizeof info->OutputBuffer )
|
|
|
|
{
|
|
|
|
SendMessageA( info->hwndEdit, EM_REPLACESEL, FALSE, (LPARAM) str );
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
strcpy( &info->OutputBuffer[info->dwOutputCount], str );
|
|
|
|
info->dwOutputCount += len;
|
2000-05-07 22:23:41 +02:00
|
|
|
}
|