summaryrefslogtreecommitdiff
path: root/src/ext/libcharsetdetect/charsetdetect.cpp
blob: 01c13a83e079b35a18a7cbfae6c02571328a5f83 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#include <iostream>
#include "charsetdetect.h"
#include "charsetdetectPriv.h"


//
// C++ API to the character set detector (not exported)
//

void Detector::Report(const char* aCharset) {
	// mDone has to be set true here because the original code
	// does not always set it, and I am trying to avoid modifying
	// the original code.
	mDone = PR_TRUE;
    
	mDetectedCharset = aCharset;
}

int Detector::Consider(const char *data, int length) {
    if (HandleData(data, length) == NS_ERROR_OUT_OF_MEMORY) {
        // Error, signal with a negative number
        return -1;
    }
    
    if (mDone) {
        // Detected early
        return 0;
    }
    
    // Need more data!
    return 1;
}

const char *Detector::Close(void) {
    DataEnd();
    
    if (!mDone) {
		if (mInputState == eEscAscii) {
			return "ibm850";
		}
		else if (mInputState == ePureAscii) {
			return "ASCII";
		}
        
		return NULL;
	}

	return mDetectedCharset;
}

//
// C API to the character set detector (we actually export this)
//

csd_t csd_open(void) {
    // TODO: capture exceptions thrown by "new" and return -1 in that case
    // TODO: provide C-land with access to the language filter constructor argument
    return new Detector(NS_FILTER_ALL);
}

int csd_consider(csd_t csd, const char *data, int length) {
    return ((Detector*)csd)->Consider(data, length);
}

const char *csd_close(csd_t csd) {
    const char *result = ((Detector*)csd)->Close();
    delete ((Detector*)csd);
    return result;
}

///*
const char *Detector::Close2(float *confidence) {
    DataEnd2(confidence);

    if (!mDone) {
		if (mInputState == eEscAscii) {
			return "ibm850";
		}
		else if (mInputState == ePureAscii) {
			return "ASCII";
		}

		return NULL;
	}

	return mDetectedCharset;
}

const char *csd_close2(csd_t csd,float *confidence) {
    const char *result = ((Detector*)csd)->Close2(confidence);
    delete ((Detector*)csd);
    return result;
}
//*/