ttomcat-1778514358873.zip-extract/apache-tomcat-11.0.18-src/java/org/apache/jasper/compiler/EncodingDetector.java

Path
ttomcat-1778514358873.zip-extract/apache-tomcat-11.0.18-src/java/org/apache/jasper/compiler/EncodingDetector.java
Status
scanned
Type
file
Name
EncodingDetector.java
Extension
.java
Programming language
Java
Mime type
text/plain
File type
ASCII text, with CRLF line terminators
Tag

      
    
Rootfs path

      
    
Size
6970 (6.8 KB)
MD5
30fa15ef26858259cfaf0db07eee9621
SHA1
59fc9439d507221ab22b5a1a783c422ddf975939
SHA256
439da504a85cb6f42c9e01ddf55bce2cb37b59752a1769319a2c66458b4ddd99
SHA512

      
    
SHA1_git
d0855acacf852e5de249947ec44ee0ac696d0d02
Is binary

      
    
Is text
True
Is archive

      
    
Is media

      
    
Is legal

      
    
Is manifest

      
    
Is readme

      
    
Is top level

      
    
Is key file

      
    
EncodingDetector.java | 6.8 KB |

/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jasper.compiler; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import javax.xml.stream.XMLInputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamReader; /* * The BoM detection is derived from: * https://svn.us.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248 * * The prolog is always at least as specific as the BOM therefore any encoding * specified in the prolog should take priority over the BOM. */ class EncodingDetector { private static final XMLInputFactory XML_INPUT_FACTORY; static { ClassLoader oldCl = Thread.currentThread().getContextClassLoader(); try { Thread.currentThread().setContextClassLoader(EncodingDetector.class.getClassLoader()); XML_INPUT_FACTORY = XMLInputFactory.newFactory(); } finally { if (oldCl != null) { Thread.currentThread().setContextClassLoader(oldCl); } } } private final String encoding; private final int skip; private final boolean encodingSpecifiedInProlog; EncodingDetector(BufferedInputStream bis) throws IOException { // Buffer is 1k. BOM is only 4 bytes. bis.mark(4); BomResult bomResult = processBom(bis); // Reset the stream back to the start to allow the XML prolog detection to work. Skip any BoM we discovered. bis.reset(); for (int i = 0; i < bomResult.skip; i++) { bis.read(); } String prologEncoding = getPrologEncoding(bis); if (prologEncoding == null) { encodingSpecifiedInProlog = false; encoding = bomResult.encoding; } else { encodingSpecifiedInProlog = true; encoding = prologEncoding; } skip = bomResult.skip; } String getEncoding() { return encoding; } int getSkip() { return skip; } boolean isEncodingSpecifiedInProlog() { return encodingSpecifiedInProlog; } private String getPrologEncoding(InputStream stream) { String encoding = null; try { XMLStreamReader xmlStreamReader = XML_INPUT_FACTORY.createXMLStreamReader(stream); encoding = xmlStreamReader.getCharacterEncodingScheme(); } catch (XMLStreamException e) { // Ignore } return encoding; } private BomResult processBom(InputStream stream) { // Read first four bytes (or as many are available) and determine encoding try { final byte[] b4 = new byte[4]; int count = 0; int singleByteRead; while (count < 4) { singleByteRead = stream.read(); if (singleByteRead == -1) { break; } b4[count] = (byte) singleByteRead; count++; } return parseBom(b4, count); } catch (IOException ioe) { // Failed. return new BomResult("UTF-8", 0); } } private BomResult parseBom(byte[] b4, int count) { if (count < 2) { return new BomResult("UTF-8", 0); } // UTF-16, with BOM int b0 = b4[0] & 0xFF; int b1 = b4[1] & 0xFF; if (b0 == 0xFE && b1 == 0xFF) { // UTF-16, big-endian return new BomResult("UTF-16BE", 2); } if (b0 == 0xFF && b1 == 0xFE) { // UTF-16, little-endian return new BomResult("UTF-16LE", 2); } // default to UTF-8 if we don't have enough bytes to make a good determination of the encoding if (count < 3) { return new BomResult("UTF-8", 0); } // UTF-8 with a BOM int b2 = b4[2] & 0xFF; if (b0 == 0xEF && b1 == 0xBB && b2 == 0xBF) { return new BomResult("UTF-8", 3); } // default to UTF-8 if we don't have enough bytes to make a good determination of the encoding if (count < 4) { return new BomResult("UTF-8", 0); } // Other encodings. No BOM. Try and ID encoding. int b3 = b4[3] & 0xFF; if (b0 == 0x00 && b1 == 0x00 && b2 == 0x00 && b3 == 0x3C) { // UCS-4, big endian (1234) return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x00 && b3 == 0x00) { // UCS-4, little endian (4321) return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x00 && b1 == 0x00 && b2 == 0x3C && b3 == 0x00) { // UCS-4, unusual octet order (2143) // REVISIT: What should this be? return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x00) { // UCS-4, unusual octet order (3412) // REVISIT: What should this be? return new BomResult("ISO-10646-UCS-4", 0); } if (b0 == 0x00 && b1 == 0x3C && b2 == 0x00 && b3 == 0x3F) { // UTF-16, big-endian, no BOM // (or could turn out to be UCS-2... // REVISIT: What should this be? return new BomResult("UTF-16BE", 0); } if (b0 == 0x3C && b1 == 0x00 && b2 == 0x3F && b3 == 0x00) { // UTF-16, little-endian, no BOM // (or could turn out to be UCS-2... return new BomResult("UTF-16LE", 0); } if (b0 == 0x4C && b1 == 0x6F && b2 == 0xA7 && b3 == 0x94) { // EBCDIC // a la xerces1, return CP037 instead of EBCDIC here return new BomResult("CP037", 0); } // default encoding return new BomResult("UTF-8", 0); } private record BomResult(String encoding, int skip) { } }
Detected license expression
apache-2.0
Detected license expression (SPDX)
Apache-2.0
Percentage of license text
15.66
Copyrights

      
    
Holders

      
    
Authors

      
    
License detections License expression License expression SPDX
apache_2_0-4bde3f57-78aa-4201-96bf-531cba09e7de apache-2.0 Apache-2.0
URL Start line End line
http://www.apache.org/licenses/LICENSE-2.0 9 9
https://svn.us.apache.org/viewvc/tomcat/trunk/java/org/apache/jasper/xmlparser/XMLEncodingDetector.java?annotate=1742248 29 29