// Copyright (c) 2007, Rodrigo Braz Monteiro // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are met: // // * Redistributions of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // * Neither the name of the Aegisub Group nor the names of its contributors // may be used to endorse or promote products derived from this software // without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE // ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS // INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN // CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) // ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // POSSIBILITY OF SUCH DAMAGE. // // ----------------------------------------------------------------------------- // // AEGISUB // // Website: http://aegisub.cellosoft.com // Contact: mailto:zeratul@cellosoft.com // /////////// // Headers #ifdef WITH_UNIVCHARDET #include #include "text_file_reader.h" #include "charset_detect.h" #include "../universalchardet/nsCharSetProber.h" #include #include struct CharDetResult { float confidence; wxString name; bool operator < (CharDetResult &par) { return confidence > par.confidence; } }; //////////////// // Get encoding wxString CharSetDetect::GetEncoding(wxString filename) { // Open file TextFileReader reader(filename,_T("Local")); // Loop through it until it finds interesting lines while (reader.HasMoreLines() && !done()) { wxString line = reader.ReadLineFromFile(); wxCharBuffer buffer = line.mb_str(wxConvLocal); HandleData(buffer,line.Length()); } // Flag as finished DataEnd(); // Grab every result obtained wxString local = wxLocale::GetSystemEncodingName(); std::list results; bool gotLocal = false; for (int i=0;iGetProbeCount(); for (int j=0;jGetConfidence(j); // Only bother with those whose confidence is at least 1% wxString curName = wxString(mCharSetProbers[i]->GetCharSetName(j),wxConvUTF8); if (conf > 0.01f || curName == local) { results.push_back(CharDetResult()); results.back().name = curName; results.back().confidence = mCharSetProbers[i]->GetConfidence(j); } } } } // If you got more than one valid result, ask the user which he wants if (results.size() > 1) { // Add local if (!gotLocal) { results.push_back(CharDetResult()); results.back().name = local; results.back().confidence = 0; } // Sort by confidence results.sort(); // Get choice from user wxArrayString choices; wxArrayString picked; int i = 0; for (std::list::iterator cur=results.begin();cur!=results.end();cur++) { wxString name = (*cur).name; if (picked.Index(name) == wxNOT_FOUND) { picked.Add(name); // Generate name wxString choiceStr; if ((*cur).confidence > 0.0f) choiceStr = wxString::Format(_T("%f%% - "),(*cur).confidence*100.0f); else choiceStr = _T("Unknown - "); choiceStr += name; if (name == local) choiceStr += _T(" (local)"); // Insert choices.Add(choiceStr); i++; if (i == 20) break; } } int choice = wxGetSingleChoiceIndex(_("Aegisub could not narrow down the character set to a single one.\nPlease pick one below:"),_("Choose character set"),choices); if (choice == -1) throw _T("Canceled"); // Retrieve name i = 0; for (std::list::iterator cur=results.begin();cur!=results.end();cur++,i++) { if (i == choice) result = (*cur).name; } } // Return whatever it got return result; } ////////// // Report void CharSetDetect::Report(const char* aCharset) { // Store the result reported result = wxString(aCharset,wxConvUTF8); } #endif // WITH_UNIVCHARDET