users@jaxb.java.net

Using EntityResolver to resolve schemas when unmarshalling with validation.

From: P Orrifolius <porrifolius_at_gmail.com>
Date: Sat, 7 Feb 2009 14:38:43 +1300

Hello.


I'm having difficulty performing local resolution of schemas when
unmarshalling with validation. I've got everything working if I'm
happy with JAXB going over the network to retrieve schemas, but I need
to use local copies of the xsds from the classpath and prevent the use
of any other schemas. I think that using an EntityResolver is the
answer to this but I can't get it to work.


I'm trying to unmarshal xml like the following:

<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<!DOCTYPE example [
    <!ENTITY copy "&#xA9;">
    <!ENTITY blah "&#xAB;">
]>
<blah xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xmlns:s1="http://schemas.blah.com/schema1.xsd"
      xmlns="http://schemas.blah.com/schema1.xsd"
      xsi:schemaLocation="http://schemas.blah.com/schema1.xsd
http://schemas.blah.com/schema1.xsd">
  <zob>
    <s1:foo xmlns="http://schemas.blah.com/schema2.xsd"
            xsi:schemaLocation="http://schemas.blah.com/schema2.xsd
http://schemas.blah.com/schema2.xsd">
      <bar>Blah de blah &blah; &copy;</bar>
  </zob>
  <baz>
    <boz xmlns="http://elsewhere.com/someschema.xsd"
         xsi:schemaLocation="http://elsewhere.com/someschema.xsd
http://elsewhere.com/someschema.xsd">
      <randomTag><otherRandomTag/></randomTag>
    </boz>
  </baz>
</blah>


<baz>'s content is an anyType and the content is just unmarshalled as
a DOM node... it shouldn't get validated as the source can put
whatever it likes in there.


The code that seems to work alright but hits the network for schemas
is as follows:

public class Test {
  private final Unmarshaller unmarshaller;

  public Test()
      throws TransformerConfigurationException,
ParserConfigurationException, JAXBException, SAXException {
    JAXBContext jaxbContext = JAXBContext.newInstance("com.blah.generated");

    unmarshaller = jaxbContext.createUnmarshaller();
    SchemaFactory schemaFactory =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
    InputStream sis1 = Test.class.getResourceAsStream("/com/blah/schema1.xsd");
    StreamSource ss1 = new StreamSource(sis1,
"http://schemas.blah.com/schema1.xsd");
    InputStream sis2 = Test.class.getResourceAsStream("/com/blah/schema2.xsd");
    StreamSource ss2 = new StreamSource(sis2,
"http://schemas.blah.com/schema2.xsd");
    Source[] sources = new Source[] {
        ss1,
        ss2
      };
    Schema schema = schemaFactory.newSchema(sources);
    unmarshaller.setSchema(schema);
  }

  public Blah unmarshalBlah(URL url)
          throws JAXBException {
      @SuppressWarnings("unchecked") JAXBElement<Blah> el =
(JAXBElement<Blah>) unmarshaller.unmarshal(url);
      return el.getValue();
  }

  public static void main(String[] args) throws MalformedURLException,
JAXBException, TransformerConfigurationException, SAXException,
ParserConfigurationException {
    URL url = new URL("file:///home/porrifolius/blah.xml");
    Test test = new Test();
    Blah b = test.unmarshalBlah(url);
  }
}


The character entity references are substituted, the xml is validated
by schema1.xsd and schema2.xsd and the content of the <baz> element is
not validated. Interestingly it does _not_ load schema1.xsd from the
network, nor someschema.xsd. It only loads schema2.xsd from the
network (and the w3c xml schema).

Now I try to make sure that the xml doesn't reference any external
resources in the DTD, that the schema1.xsd and schema2.xsd are the
only schemas used and that they are loaded from the classpath.


If I use the following code then it tries to load someschema.xsd which
is not allowed. If I comment out the xmlReader.setFeature that asks
for validation then no schemas are loaded by the EntityResolver but it
also lets invalid xml through.

public class Test {
    private final Unmarshaller unmarshaller;
    private final XMLReader xmlReader;


    public Test()
            throws TransformerConfigurationException,
ParserConfigurationException, JAXBException, SAXException {
        JAXBContext jaxbContext = JAXBContext.newInstance("com.blah.generated");

        unmarshaller = jaxbContext.createUnmarshaller();

        SAXParserFactory parserFactory = SAXParserFactory.newInstance();
        parserFactory.setNamespaceAware(true);
        SAXParser saxParser = parserFactory.newSAXParser();
        xmlReader = saxParser.getXMLReader();
        xmlReader.setFeature("http://apache.org/xml/features/validation/schema",
true);
        xmlReader.setEntityResolver(new EntityResolver() {
            public InputSource resolveEntity(String publicId, String systemId) {
                System.out.println("Entity resolving:" + publicId + "
    " + systemId);
                if ("http://schemas.blah.com/schema1.xsd".equals(systemId)) {
                    System.out.println("... loading from classpath");
                    return new
InputSource(Test.class.getResourceAsStream("/com/blah/schema1.xsd"));
                }
                else if
("http://schemas.blah.com/schema2.xsd".equals(systemId)) {
                    System.out.println("... loading from classpath");
                    return new
InputSource(Test.class.getResourceAsStream("/com/blah/schema2.xsd"));
                }
                else if ("http://www.w3.org/2001/xml.xsd".equals(systemId)) {
                    System.out.println("... loading from classpath");
                    return new
InputSource(Test.class.getResourceAsStream("/xml.xsd"));
                }
                else {
                    throw new RuntimeException("don't want to load any
other schemas");
                }
            }
        });
    }

    public Blah unmarshalBlah(URL url)
            throws JAXBException {
        try {
            SAXSource saxSource = new SAXSource(xmlReader, new
InputSource(url.openStream()));
            @SuppressWarnings("unchecked") JAXBElement<Blah> el =
(JAXBElement<Blah>) unmarshaller.unmarshal(saxSource);
            return el.getValue();
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
    }

    public static void main(String[] args) throws
MalformedURLException, JAXBException,
TransformerConfigurationException, SAXException,
ParserConfigurationException {
        URL url = new URL("file:///home/porrifolius/blah.xml");
        Test test = new Test();
        Blah b = test.unmarshalBlah(url);
    }
}


Oh, and if I continue to create and set the Schema on the unmarshaller
as in the first Test class then the EntityResolver doesn't get called
at all and it still loads scheam2.xsd from the network.


Does anybody have any advice how to get this working? Or links to
some documentation that sets out how to do it?

Thanks very much
P