diff options
Diffstat (limited to 'src/ext/libcharsetdetect/charsetdetect.cpp')
-rw-r--r-- | src/ext/libcharsetdetect/charsetdetect.cpp | 94 |
1 files changed, 94 insertions, 0 deletions
diff --git a/src/ext/libcharsetdetect/charsetdetect.cpp b/src/ext/libcharsetdetect/charsetdetect.cpp new file mode 100644 index 0000000..01c13a8 --- /dev/null +++ b/src/ext/libcharsetdetect/charsetdetect.cpp @@ -0,0 +1,94 @@ +#include <iostream> +#include "charsetdetect.h" +#include "charsetdetectPriv.h" + + +// +// C++ API to the character set detector (not exported) +// + +void Detector::Report(const char* aCharset) { + // mDone has to be set true here because the original code + // does not always set it, and I am trying to avoid modifying + // the original code. + mDone = PR_TRUE; + + mDetectedCharset = aCharset; +} + +int Detector::Consider(const char *data, int length) { + if (HandleData(data, length) == NS_ERROR_OUT_OF_MEMORY) { + // Error, signal with a negative number + return -1; + } + + if (mDone) { + // Detected early + return 0; + } + + // Need more data! + return 1; +} + +const char *Detector::Close(void) { + DataEnd(); + + if (!mDone) { + if (mInputState == eEscAscii) { + return "ibm850"; + } + else if (mInputState == ePureAscii) { + return "ASCII"; + } + + return NULL; + } + + return mDetectedCharset; +} + +// +// C API to the character set detector (we actually export this) +// + +csd_t csd_open(void) { + // TODO: capture exceptions thrown by "new" and return -1 in that case + // TODO: provide C-land with access to the language filter constructor argument + return new Detector(NS_FILTER_ALL); +} + +int csd_consider(csd_t csd, const char *data, int length) { + return ((Detector*)csd)->Consider(data, length); +} + +const char *csd_close(csd_t csd) { + const char *result = ((Detector*)csd)->Close(); + delete ((Detector*)csd); + return result; +} + +///* +const char *Detector::Close2(float *confidence) { + DataEnd2(confidence); + + if (!mDone) { + if (mInputState == eEscAscii) { + return "ibm850"; + } + else if (mInputState == ePureAscii) { + return "ASCII"; + } + + return NULL; + } + + return mDetectedCharset; +} + +const char *csd_close2(csd_t csd,float *confidence) { + const char *result = ((Detector*)csd)->Close2(confidence); + delete ((Detector*)csd); + return result; +} +//*/
\ No newline at end of file |