001 /* $Id: ExtendedBaseRules.java 992060 2010-09-02 19:09:47Z simonetripodi $
002 *
003 * Licensed to the Apache Software Foundation (ASF) under one or more
004 * contributor license agreements. See the NOTICE file distributed with
005 * this work for additional information regarding copyright ownership.
006 * The ASF licenses this file to You under the Apache License, Version 2.0
007 * (the "License"); you may not use this file except in compliance with
008 * the License. You may obtain a copy of the License at
009 *
010 * http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018
019
020 package org.apache.commons.digester;
021
022
023 import java.util.ArrayList;
024 import java.util.Collections;
025 import java.util.Comparator;
026 import java.util.HashMap;
027 import java.util.Iterator;
028 import java.util.List;
029 import java.util.Map;
030
031
032 /**
033 * <p>Extension of {@link RulesBase} for complex schema.</p>
034 *
035 * <p>This is an extension of the basic pattern matching scheme
036 * intended to improve support for mapping complex xml-schema.
037 * It is intended to be a minimal extension of the standard rules
038 * big enough to support complex schema but without the full generality
039 * offered by more exotic matching pattern rules.</p>
040 *
041 * <h4>When should you use this rather than the original?</h4>
042 *
043 * <p>
044 * This pattern-matching engine is complex and slower than the basic
045 * default RulesBase class, but offers more functionality:
046 * <ul>
047 * <li>Universal patterns allow patterns to be specified which will match
048 * regardless of whether there are "better matching" patterns available.</li>
049 * <li>Parent-match patterns (eg "a/b/?") allow matching for all direct
050 * children of a specified element.</li>
051 * <li>Ancestor-match patterns (eg "a/b/*") allow matching all elements
052 * nested within a specified element to any nesting depth.</li>
053 * <li>Completely-wild patterns ("*" or "!*") allow matching all elements.</li>
054 * </ul>
055 * </p>
056 *
057 * <h4>Universal Match Patterns</h4>
058 *
059 * <p>The default RulesBase pattern-matching engine always attempts to find
060 * the "best matching pattern", and will ignore rules associated with other
061 * patterns that match but are not "as good". As an example, if the pattern
062 * "a/b/c" is associated with rules 1 and 2, and "*/c" is associated with
063 * rules 3 and 4 then element "a/b/c" will cause only rules 1 and 2 to execute.
064 * Rules 3 and 4 do have matching patterns, but because the patterns are shorter
065 * and include wildcard characters they are regarded as being "not as good" as
066 * a direct match. In general, exact patterns are better than wildcard patterns,
067 * and among multiple patterns with wildcards, the longest is preferred.
068 * See the RulesBase class for more information.</p>
069 *
070 * <p>This feature of preferring "better" patterns can be a powerful tool.
071 * However it also means that patterns can interact in unexpected ways.</p>
072 *
073 * <p>When using the ExtendedBaseRules, any pattern prefixed with '!' bypasses
074 * the "best match" feature. Even if there is an exact match or a longer
075 * wildcard match, patterns prefixed by '!' will still be tested to see if
076 * they match, and if so their associated Rule objects will be included in
077 * the set of rules to be executed in the normal manner.</p>
078 *
079 * <ul>
080 * <li>Pattern <code>"!*/a/b"</code> matches whenever an 'b' element
081 * is inside an 'a'.</li>
082 * <li>Pattern <code>"!a/b/?"</code> matches any child of a parent
083 * matching <code>"a/b"</code> (see "Parent Match Patterns").</li>
084 * <li>Pattern <code>"!*/a/b/?"</code> matches any child of a parent
085 * matching <code>"!*/a/b"</code> (see "Parent Match Patterns").</li>
086 * <li>Pattern <code>"!a/b/*"</code> matches any element whose path
087 * starts with "a" then "b" (see "Ancestor Match Patterns").</li>
088 * <li>Pattern <code>"!*/a/b/*"</code> matches any elements whose path
089 * contains 'a/b' (see "Ancestor Match Patterns").</li>
090 * </ul>
091 *
092 * <h4>Parent Match Patterns</h4>
093 *
094 * <p>
095 * These will match direct child elements of a particular parent element.
096 * <ul>
097 * <li>
098 * <code>"a/b/c/?"</code> matches any child whose parent matches
099 * <code>"a/b/c"</code>. Exact parent rules take precedence over Ancestor
100 * Match patterns.
101 * </li>
102 * <li>
103 * <code>"*/a/b/c/?"</code> matches any child whose parent matches
104 * <code>"*/a/b/c"</code>. The longest matching still applies to parent
105 * matches but the length excludes the '?', which effectively means
106 * that standard wildcard matches with the same level of depth are
107 * chosen in preference.
108 * </li>
109 * </ul>
110 * </p>
111 *
112 * <h4>Ancestor Match Patterns</h4>
113 *
114 * <p>
115 * These will match elements whose parentage includes a particular sequence
116 * of elements.
117 * <ul>
118 * <li>
119 * <code>"a/b/*"</code> matches any element whose path starts with
120 * 'a' then 'b'. Exact parent and parent match rules take precedence.
121 * The longest ancestor match will take precedence.
122 * </li>
123 * <li>
124 * <code>"*/a/b/*"</code> matches any elements whose path contains
125 * an element 'a' followed by an element 'b'. The longest matching still
126 * applies but the length excludes the '*' at the end.
127 * </li>
128 * </ul>
129 * </p>
130 *
131 * <h4>Completely Wild Patterns</h4>
132 *
133 * <p>Pattern <code>"*"</code> matches every pattern that isn't matched by
134 * any other basic rule.</p>
135 *
136 * <p>Pattern <code>"!*"</code> matches every pattern.</p>
137 *
138 * <h4>Using The Extended Rules</h4>
139 *
140 * <p>By default, a Digester instance uses a {@link RulesBase} instance as
141 * its pattern matching engine. To use an ExtendedBaseRules instance, call
142 * the Digester.setRules method before adding any Rule objects to the digester
143 * instance:
144 * <pre>
145 * Digester digester = new Digester();
146 * digester.setRules( new ExtendedBaseRules() );
147 * </pre></p>
148 *
149 * <p>The most important thing to remember when using the extended rules is
150 * that universal and non-universal patterns are completely independent.
151 * Universal patterns are never affected by the addition of new patterns
152 * or the removal of existing ones. Non-universal patterns are never affected
153 * by the addition of new <em>universal</em> patterns or the removal of
154 * existing <em>universal</em> patterns. As in the basic matching rules,
155 * non-universal (basic) patterns <strong>can</strong> be affected by the
156 * addition of new <em>non-universal</em> patterns or the removal of existing
157 * <em>non-universal</em> patterns, because only rules associated with the
158 * "best matching" pattern for each xml element are executed.
159 *
160 * <p> This means that you can use universal patterns to build up the simple
161 * parts of your structure - for example defining universal creation and
162 * property setting rules. More sophisticated and complex mapping will require
163 * non-universal patterns and this might mean that some of the universal rules
164 * will need to be replaced by a series of special cases using non-universal
165 * rules. But by using universal rules as your backbone, these additions
166 * should not break your existing rules.</p>
167 */
168
169
170 public class ExtendedBaseRules extends RulesBase {
171
172
173 // ----------------------------------------------------- Instance Variables
174
175 /**
176 * Counts the entry number for the rules.
177 */
178 private int counter = 0;
179
180
181 /**
182 * The decision algorithm used (unfortunately) doesn't preserve the entry
183 * order.
184 * This map is used by a comparator which orders the list of matches
185 * before it's returned.
186 * This map stores the entry number keyed by the rule.
187 */
188 private Map<Rule, Integer> order = new HashMap<Rule, Integer>();
189
190
191 // --------------------------------------------------------- Public Methods
192
193
194 /**
195 * Register a new Rule instance matching the specified pattern.
196 *
197 * @param pattern Nesting pattern to be matched for this Rule
198 * @param rule Rule instance to be registered
199 */
200 @Override
201 public void add(String pattern, Rule rule) {
202 super.add(pattern, rule);
203 counter++;
204 order.put(rule, counter);
205 }
206
207
208 /**
209 * Return a List of all registered Rule instances that match the specified
210 * nesting pattern, or a zero-length List if there are no matches. If more
211 * than one Rule instance matches, they <strong>must</strong> be returned
212 * in the order originally registered through the <code>add()</code>
213 * method.
214 *
215 * @param pattern Nesting pattern to be matched
216 */
217 @Override
218 public List<Rule> match(String namespace, String pattern) {
219 // calculate the pattern of the parent
220 // (if the element has one)
221 String parentPattern = "";
222 int lastIndex = pattern.lastIndexOf('/');
223
224 boolean hasParent = true;
225 if (lastIndex == -1) {
226 // element has no parent
227 hasParent = false;
228
229 } else {
230 // calculate the pattern of the parent
231 parentPattern = pattern.substring(0, lastIndex);
232
233 }
234
235
236 // we keep the list of universal matches separate
237 List<Rule> universalList = new ArrayList<Rule>(counter);
238
239 // Universal all wildards ('!*')
240 // These are always matched so always add them
241 List<Rule> tempList = this.cache.get("!*");
242 if (tempList != null) {
243 universalList.addAll(tempList);
244 }
245
246 // Universal exact parent match
247 // need to get this now since only wildcards are considered later
248 tempList = this.cache.get("!" + parentPattern + "/?");
249 if (tempList != null) {
250 universalList.addAll(tempList);
251 }
252
253
254 // base behaviour means that if we certain matches, we don't continue
255 // but we just have a single combined loop and so we have to set
256 // a variable
257 boolean ignoreBasicMatches = false;
258
259
260 // see if we have an exact basic pattern match
261 List<Rule> rulesList = this.cache.get(pattern);
262 if (rulesList != null) {
263 // we have a match!
264 // so ignore all basic matches from now on
265 ignoreBasicMatches = true;
266
267 } else {
268
269 // see if we have an exact child match
270 if (hasParent) {
271 // matching children takes preference
272 rulesList = this.cache.get(parentPattern + "/?");
273 if (rulesList != null) {
274 // we have a match!
275 // so ignore all basic matches from now on
276 ignoreBasicMatches = true;
277
278 } else {
279 // we don't have a match yet - so try exact ancester
280 //
281 rulesList = findExactAncesterMatch(pattern);
282 if (rulesList != null) {
283 // we have a match!
284 // so ignore all basic matches from now on
285 ignoreBasicMatches = true;
286 }
287 }
288 }
289 }
290
291
292 // OK - we're ready for the big loop!
293 // Unlike the basic rules case,
294 // we have to go through for all those universal rules in all cases.
295
296 // Find the longest key, ie more discriminant
297 String longKey = "";
298 int longKeyLength = 0;
299
300 for (String key : this.cache.keySet()) {
301
302 // find out if it's a univeral pattern
303 // set a flag
304 boolean isUniversal = key.startsWith("!");
305 if (isUniversal) {
306 // and find the underlying key
307 key = key.substring(1, key.length());
308 }
309
310
311 // don't need to check exact matches
312 boolean wildcardMatchStart = key.startsWith("*/");
313 boolean wildcardMatchEnd = key.endsWith("/*");
314 if (wildcardMatchStart || (isUniversal && wildcardMatchEnd)) {
315
316 boolean parentMatched = false;
317 boolean basicMatched = false;
318 boolean ancesterMatched = false;
319
320 boolean parentMatchEnd = key.endsWith("/?");
321 if (parentMatchEnd) {
322 // try for a parent match
323 parentMatched = parentMatch(key, pattern, parentPattern);
324
325 } else if (wildcardMatchEnd) {
326 // check for ancester match
327 if (wildcardMatchStart) {
328 String patternBody = key.substring(2, key.length() - 2);
329 if (pattern.endsWith(patternBody)) {
330 ancesterMatched = true;
331 } else {
332 ancesterMatched = (pattern.indexOf(patternBody + "/") > -1);
333 }
334 } else {
335 String bodyPattern = key.substring(0, key.length() - 2);
336 if (pattern.startsWith(bodyPattern))
337 {
338 if (pattern.length() == bodyPattern.length()) {
339 // exact match
340 ancesterMatched = true;
341 } else {
342 ancesterMatched = ( pattern.charAt(bodyPattern.length()) == '/' );
343 }
344 } else {
345 ancesterMatched = false;
346 }
347 }
348 } else {
349 // try for a base match
350 basicMatched = basicMatch(key, pattern);
351 }
352
353 if (parentMatched || basicMatched || ancesterMatched) {
354 if (isUniversal) {
355 // universal rules go straight in
356 // (no longest matching rule)
357 tempList = this.cache.get("!" + key);
358 if (tempList != null) {
359 universalList.addAll(tempList);
360 }
361
362 } else {
363 if (!ignoreBasicMatches) {
364 // ensure that all parent matches are SHORTER
365 // than rules with same level of matching.
366 //
367 // the calculations below don't work for universal
368 // matching, but we don't care because in that case
369 // this if-stmt is not entered.
370 int keyLength = key.length();
371 if (wildcardMatchStart) {
372 --keyLength;
373 }
374 if (wildcardMatchEnd) {
375 --keyLength;
376 } else if (parentMatchEnd) {
377 --keyLength;
378 }
379
380 if (keyLength > longKeyLength) {
381 rulesList = this.cache.get(key);
382 longKey = key;
383 longKeyLength = keyLength;
384 }
385 }
386 }
387 }
388 }
389 }
390
391
392 // '*' works in practice as a default matching
393 // (this is because anything is a deeper match!)
394 if (rulesList == null) {
395 rulesList = this.cache.get("*");
396 }
397
398 // if we've matched a basic pattern, then add to the universal list
399 if (rulesList != null) {
400 universalList.addAll(rulesList);
401 }
402
403
404 // don't filter if namespace is null
405 if (namespace != null) {
406 // remove invalid namespaces
407 Iterator<Rule> it = universalList.iterator();
408 while (it.hasNext()) {
409 Rule rule = it.next();
410 String ns_uri = rule.getNamespaceURI();
411 if (ns_uri != null && !ns_uri.equals(namespace)) {
412 it.remove();
413 }
414 }
415 }
416
417
418 // need to make sure that the collection is sort in the order
419 // of addition. We use a custom comparator for this
420 Collections.sort(
421 universalList,
422 new Comparator<Rule>() {
423
424 public int compare(Rule r1, Rule r2) throws ClassCastException {
425 // Get the entry order from the map
426 Integer i1 = order.get(r1);
427 Integer i2 = order.get(r2);
428
429 // and use that to perform the comparison
430 if (i1 == null) {
431 if (i2 == null) {
432
433 return 0;
434
435 } else {
436
437 return -1;
438
439 }
440 } else if (i2 == null) {
441 return 1;
442 }
443
444 return (i1.intValue() - i2.intValue());
445 }
446 });
447
448 return universalList;
449 }
450
451 /**
452 * Matching parent.
453 */
454 private boolean parentMatch(String key, String pattern, String parentPattern) {
455 return parentPattern.endsWith(key.substring(1, key.length() - 2));
456 }
457
458 /**
459 * Standard match.
460 * Matches the end of the pattern to the key.
461 */
462 private boolean basicMatch(String key, String pattern) {
463 return (pattern.equals(key.substring(2)) ||
464 pattern.endsWith(key.substring(1)));
465 }
466
467 /**
468 * Finds an exact ancester match for given pattern
469 */
470 private List<Rule> findExactAncesterMatch(String parentPattern) {
471 List<Rule> matchingRules = null;
472 int lastIndex = parentPattern.length();
473 while (lastIndex-- > 0) {
474 lastIndex = parentPattern.lastIndexOf('/', lastIndex);
475 if (lastIndex > 0) {
476 matchingRules = this.cache.get(parentPattern.substring(0, lastIndex) + "/*");
477 if (matchingRules != null) {
478 return matchingRules;
479 }
480 }
481 }
482 return null;
483 }
484 }