/* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: TransService.cpp 176382 2005-04-21 09:05:57Z cargilld $ */ // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include XERCES_CPP_NAMESPACE_BEGIN // --------------------------------------------------------------------------- // Local, static data // // gStrictIANAEncoding // A flag to control whether strict IANA encoding names checking should // be done // // --------------------------------------------------------------------------- static bool gStrictIANAEncoding = false; RefHashTableOf* XMLTransService::gMappings = 0; RefVectorOf * XMLTransService::gMappingsRecognizer = 0; static XMLRegisterCleanup mappingsCleanup; static XMLRegisterCleanup mappingsRecognizerCleanup; // ----------------------------------------------------------------------- // Notification that lazy data has been deleted // ----------------------------------------------------------------------- void XMLTransService::reinitMappings() { delete gMappings; // The contents of the gMappings hash table are owned by gMappings = 0; // the it, and so will be deleted by gMapping's destructor. } void XMLTransService::reinitMappingsRecognizer() { delete XMLTransService::gMappingsRecognizer; gMappingsRecognizer = 0; } // --------------------------------------------------------------------------- // XMLTransService: Constructors and destructor // --------------------------------------------------------------------------- XMLTransService::XMLTransService() { if (!gMappings) { RefHashTableOf* t = new RefHashTableOf(103); if (XMLPlatformUtils::compareAndSwap((void **)&gMappings, t, 0) != 0) { delete t; } else { mappingsCleanup.registerCleanup(reinitMappings); } } if (!gMappingsRecognizer) { RefVectorOf* t = new RefVectorOf(XMLRecognizer::Encodings_Count); if (XMLPlatformUtils::compareAndSwap((void **)&gMappingsRecognizer, t, 0) != 0) { delete t; } else { mappingsRecognizerCleanup.registerCleanup(reinitMappingsRecognizer); } } } XMLTransService::~XMLTransService() { } // --------------------------------------------------------------------------- // Allow user specific encodings to be added to the mappings table. // Should be called after platform init // --------------------------------------------------------------------------- void XMLTransService::addEncoding(const XMLCh* const encoding, ENameMap* const ownMapping) { gMappings->put((void *) encoding, ownMapping); } // --------------------------------------------------------------------------- // XMLTransService: Non-virtual API // --------------------------------------------------------------------------- XMLTranscoder* XMLTransService::makeNewTranscoderFor( const char* const encodingName , XMLTransService::Codes& resValue , const unsigned int blockSize , MemoryManager* const manager) { XMLCh* tmpName = XMLString::transcode(encodingName, manager); ArrayJanitor janName(tmpName, manager); return makeNewTranscoderFor(tmpName, resValue, blockSize, manager); } XMLTranscoder* XMLTransService::makeNewTranscoderFor( const XMLCh* const encodingName , XMLTransService::Codes& resValue , const unsigned int blockSize , MemoryManager* const manager) { // // If strict IANA encoding flag is set, validate encoding name // if (gStrictIANAEncoding) { if (!EncodingValidator::instance()->isValidEncoding(encodingName)) { resValue = XMLTransService::UnsupportedEncoding; return 0; } } // // First try to find it in our list of mappings to intrinsically // supported encodings. We have to upper case the passed encoding // name because we use a hash table and we stored all our mappings // in all uppercase. // const unsigned int bufSize = 2048; XMLCh upBuf[bufSize + 1]; if (!XMLString::copyNString(upBuf, encodingName, bufSize)) { resValue = XMLTransService::InternalFailure; return 0; } XMLString::upperCaseASCII(upBuf); ENameMap* ourMapping = gMappings->get(upBuf); // If we found it, then call the factory method for it if (ourMapping) { XMLTranscoder* temp = ourMapping->makeNew(blockSize, manager); resValue = temp ? XMLTransService::Ok : XMLTransService::InternalFailure; return temp; } // // It wasn't an intrinsic and it wasn't disallowed, so pass it on // to the trans service to see if he can make anything of it. // XMLTranscoder* temp = makeNewXMLTranscoder(encodingName, resValue, blockSize, manager); // if successful, set resValue to OK // if failed, the makeNewXMLTranscoder has already set the proper failing resValue if (temp) resValue = XMLTransService::Ok; return temp; } XMLTranscoder* XMLTransService::makeNewTranscoderFor( XMLRecognizer::Encodings encodingEnum , XMLTransService::Codes& resValue , const unsigned int blockSize , MemoryManager* const manager) { // // We can only make transcoder if the passed encodingEnum is under this range // if (encodingEnum < XMLRecognizer::Encodings_Min || encodingEnum > XMLRecognizer::Encodings_Max) { resValue = XMLTransService::InternalFailure; return 0; } ENameMap* ourMapping = gMappingsRecognizer->elementAt(encodingEnum); // If we found it, then call the factory method for it if (ourMapping) { XMLTranscoder* temp = ourMapping->makeNew(blockSize, manager); resValue = temp ? XMLTransService::Ok : XMLTransService::InternalFailure; return temp; } else { XMLTranscoder* temp = makeNewXMLTranscoder(XMLRecognizer::nameForEncoding(encodingEnum, manager), resValue, blockSize, manager); // if successful, set resValue to OK // if failed, the makeNewXMLTranscoder has already set the proper failing resValue if (temp) resValue = XMLTransService::Ok; return temp; } } // --------------------------------------------------------------------------- // XMLTransTransService: Hidden Init Method // // This is called by platform utils during startup. // --------------------------------------------------------------------------- void XMLTransService::initTransService() { // // A stupid way to increment the fCurCount inside the RefVectorOf // for (unsigned int i = 0; i < XMLRecognizer::Encodings_Count; i++) gMappingsRecognizer->addElement(0); // // Add in the magical mapping for the native XMLCh transcoder. This // is used for internal entities. // gMappingsRecognizer->setElementAt(new ENameMapFor(XMLUni::fgXMLChEncodingString), XMLRecognizer::XERCES_XMLCH); gMappings->put((void*)XMLUni::fgXMLChEncodingString, new ENameMapFor(XMLUni::fgXMLChEncodingString)); // // Add in our mappings for ASCII. // gMappingsRecognizer->setElementAt(new ENameMapFor(XMLUni::fgUSASCIIEncodingString), XMLRecognizer::US_ASCII); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString, new ENameMapFor(XMLUni::fgUSASCIIEncodingString)); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString2, new ENameMapFor(XMLUni::fgUSASCIIEncodingString2)); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString3, new ENameMapFor(XMLUni::fgUSASCIIEncodingString3)); gMappings->put((void*)XMLUni::fgUSASCIIEncodingString4, new ENameMapFor(XMLUni::fgUSASCIIEncodingString4)); // // Add in our mappings for UTF-8 // gMappingsRecognizer->setElementAt(new ENameMapFor(XMLUni::fgUTF8EncodingString), XMLRecognizer::UTF_8); gMappings->put((void*)XMLUni::fgUTF8EncodingString, new ENameMapFor(XMLUni::fgUTF8EncodingString)); gMappings->put((void*)XMLUni::fgUTF8EncodingString2, new ENameMapFor(XMLUni::fgUTF8EncodingString2)); // // Add in our mappings for Latin1 // gMappings->put((void*)XMLUni::fgISO88591EncodingString, new ENameMapFor(XMLUni::fgISO88591EncodingString)); gMappings->put((void*)XMLUni::fgISO88591EncodingString2, new ENameMapFor(XMLUni::fgISO88591EncodingString2)); gMappings->put((void*)XMLUni::fgISO88591EncodingString3, new ENameMapFor(XMLUni::fgISO88591EncodingString3)); gMappings->put((void*)XMLUni::fgISO88591EncodingString4, new ENameMapFor(XMLUni::fgISO88591EncodingString4)); gMappings->put((void*)XMLUni::fgISO88591EncodingString5, new ENameMapFor(XMLUni::fgISO88591EncodingString5)); gMappings->put((void*)XMLUni::fgISO88591EncodingString6, new ENameMapFor(XMLUni::fgISO88591EncodingString6)); gMappings->put((void*)XMLUni::fgISO88591EncodingString7, new ENameMapFor(XMLUni::fgISO88591EncodingString7)); gMappings->put((void*)XMLUni::fgISO88591EncodingString8, new ENameMapFor(XMLUni::fgISO88591EncodingString8)); gMappings->put((void*)XMLUni::fgISO88591EncodingString9, new ENameMapFor(XMLUni::fgISO88591EncodingString9)); gMappings->put((void*)XMLUni::fgISO88591EncodingString10, new ENameMapFor(XMLUni::fgISO88591EncodingString10)); gMappings->put((void*)XMLUni::fgISO88591EncodingString11, new ENameMapFor(XMLUni::fgISO88591EncodingString11)); gMappings->put((void*)XMLUni::fgISO88591EncodingString12, new ENameMapFor(XMLUni::fgISO88591EncodingString12)); // // Add in our mappings for UTF-16 and UCS-4, little endian // bool swapped = false; #if defined(ENDIANMODE_BIG) swapped = true; #endif gMappingsRecognizer->setElementAt(new EEndianNameMapFor(XMLUni::fgUTF16LEncodingString, swapped), XMLRecognizer::UTF_16L); gMappings->put ( (void*)XMLUni::fgUTF16LEncodingString, new EEndianNameMapFor ( XMLUni::fgUTF16LEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUTF16LEncodingString2, new EEndianNameMapFor ( XMLUni::fgUTF16LEncodingString2 , swapped ) ); gMappingsRecognizer->setElementAt(new EEndianNameMapFor(XMLUni::fgUCS4LEncodingString, swapped), XMLRecognizer::UCS_4L); gMappings->put ( (void*)XMLUni::fgUCS4LEncodingString, new EEndianNameMapFor ( XMLUni::fgUCS4LEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUCS4LEncodingString2, new EEndianNameMapFor ( XMLUni::fgUCS4LEncodingString2 , swapped ) ); // // Add in our mappings for UTF-16 and UCS-4, big endian // swapped = false; #if defined(ENDIANMODE_LITTLE) swapped = true; #endif gMappingsRecognizer->setElementAt(new EEndianNameMapFor(XMLUni::fgUTF16BEncodingString, swapped), XMLRecognizer::UTF_16B); gMappings->put ( (void*)XMLUni::fgUTF16BEncodingString, new EEndianNameMapFor ( XMLUni::fgUTF16BEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUTF16BEncodingString2, new EEndianNameMapFor ( XMLUni::fgUTF16BEncodingString2 , swapped ) ); gMappingsRecognizer->setElementAt(new EEndianNameMapFor(XMLUni::fgUCS4BEncodingString, swapped), XMLRecognizer::UCS_4B); gMappings->put ( (void*)XMLUni::fgUCS4BEncodingString, new EEndianNameMapFor ( XMLUni::fgUCS4BEncodingString , swapped ) ); gMappings->put ( (void*)XMLUni::fgUCS4BEncodingString2, new EEndianNameMapFor ( XMLUni::fgUCS4BEncodingString2 , swapped ) ); // // Add in our mappings for UTF-16 and UCS-4 which does not indicate endian // assumes the same endian encoding as the OS // gMappings->put ( (void*)XMLUni::fgUTF16EncodingString, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString2, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString2 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString3, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString3 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString4, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString4 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString5, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString5 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString6, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString6 , false ) ); gMappings->put ( (void*)XMLUni::fgUTF16EncodingString7, new EEndianNameMapFor ( XMLUni::fgUTF16EncodingString7 , false ) ); gMappings->put ( (void*)XMLUni::fgUCS4EncodingString, new EEndianNameMapFor ( XMLUni::fgUCS4EncodingString , false ) ); gMappings->put ( (void*)XMLUni::fgUCS4EncodingString2, new EEndianNameMapFor ( XMLUni::fgUCS4EncodingString2 , false ) ); gMappings->put ( (void*)XMLUni::fgUCS4EncodingString3, new EEndianNameMapFor ( XMLUni::fgUCS4EncodingString3 , false ) ); // // Add in our mappings for IBM037, and the one alias we support for // it, which is EBCDIC-CP-US. // gMappingsRecognizer->setElementAt(new ENameMapFor(XMLUni::fgEBCDICEncodingString), XMLRecognizer::EBCDIC); gMappings->put((void*)XMLUni::fgIBM037EncodingString, new ENameMapFor(XMLUni::fgIBM037EncodingString)); gMappings->put((void*)XMLUni::fgIBM037EncodingString2, new ENameMapFor(XMLUni::fgIBM037EncodingString2)); //hhe gMappings->put((void*)XMLUni::fgIBM1047EncodingString, new ENameMapFor(XMLUni::fgIBM1047EncodingString)); gMappings->put((void*)XMLUni::fgIBM1047EncodingString2, new ENameMapFor(XMLUni::fgIBM1047EncodingString2)); // // Add in our mappings for IBM037 with Euro update, i.e. IBM1140. It // has alias IBM01140, the one suggested by IANA // gMappings->put((void*)XMLUni::fgIBM1140EncodingString, new ENameMapFor(XMLUni::fgIBM1140EncodingString)); gMappings->put((void*)XMLUni::fgIBM1140EncodingString2, new ENameMapFor(XMLUni::fgIBM1140EncodingString2)); gMappings->put((void*)XMLUni::fgIBM1140EncodingString3, new ENameMapFor(XMLUni::fgIBM1140EncodingString3)); gMappings->put((void*)XMLUni::fgIBM1140EncodingString4, new ENameMapFor(XMLUni::fgIBM1140EncodingString4)); // // Add in our mappings for Windows-1252. We don't have any aliases for // this one, so there is just one mapping. // gMappings->put((void*)XMLUni::fgWin1252EncodingString, new ENameMapFor(XMLUni::fgWin1252EncodingString)); } // --------------------------------------------------------------------------- // XMLTransService: IANA encoding setting // --------------------------------------------------------------------------- void XMLTransService::strictIANAEncoding(const bool newState) { gStrictIANAEncoding = newState; } bool XMLTransService::isStrictIANAEncoding() { return gStrictIANAEncoding; } // --------------------------------------------------------------------------- // XMLTranscoder: Public Destructor // --------------------------------------------------------------------------- XMLTranscoder::~XMLTranscoder() { fMemoryManager->deallocate(fEncodingName);//delete [] fEncodingName; } // --------------------------------------------------------------------------- // XMLTranscoder: Hidden Constructors // --------------------------------------------------------------------------- XMLTranscoder::XMLTranscoder(const XMLCh* const encodingName , const unsigned int blockSize , MemoryManager* const manager) : fBlockSize(blockSize) , fEncodingName(0) , fMemoryManager(manager) { fEncodingName = XMLString::replicate(encodingName, fMemoryManager); } // --------------------------------------------------------------------------- // XMLTranscoder: Protected helpers // --------------------------------------------------------------------------- void XMLTranscoder::checkBlockSize(const unsigned int) { // if (toCheck > fBlockSize) // ThrowXML(TranscodingException, XMLExcepts::Trans_BadBlockSize); } // --------------------------------------------------------------------------- // XMLLCPTranscoder: Public Destructor // --------------------------------------------------------------------------- XMLLCPTranscoder::XMLLCPTranscoder() { } // --------------------------------------------------------------------------- // XMLLCPTranscoder: Hidden Constructors // --------------------------------------------------------------------------- XMLLCPTranscoder::~XMLLCPTranscoder() { } XERCES_CPP_NAMESPACE_END