1
2 package org.textensor.xml;
3
4 import org.textensor.report.E;
5
6
7
8 public class XMLChecker {
9
10
11
12 public static void checkXML(String s, boolean bshow) {
13 long starttime = System.currentTimeMillis();
14 XMLTokenizer tkz = new XMLTokenizer(s);
15 int nerror = 0;
16 int nread = 0;
17
18 while (true) {
19 XMLToken xmlt = tkz.nextToken();
20 if (bshow) {
21 System.out.println("item " + nread + " " + xmlt);
22 }
23 nread++;
24 if (xmlt.isNone()) {
25 break;
26 }
27 }
28 long endtime = System.currentTimeMillis();
29
30 System.out.println(" Total tags: " + nread + "\n total errors: " + nerror +
31 "\n tokenizing took " + (int)(endtime - starttime) + " ms");
32 }
33
34
35
36
37
38
39
40
41 public static String deGarbage(String s) {
42 if (s.startsWith("<")) {
43
44
45 } else {
46 int iob = s.indexOf("<");
47
48 if (iob > 0) {
49 String junk = s.substring(0, iob);
50 if (junk.trim().length() > 0) {
51
52 System.out.println("WARNING - garbage at start of xml file - first < is at " +
53 iob + " preceded by ---" + junk + "---");
54 }
55 s = s.substring(iob, s.length());
56
57 } else {
58 E.error(" - xml file contains no xml " + s);
59 s = null;
60 }
61 }
62
63 return s;
64 }
65
66
67
68
69 }
70
71