dtd2xsd.pl 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836
  1. #! perl
  2. #
  3. # by Dan Connolly http://www.w3.org/People/Connolly/ connolly@w3.org
  4. # Bert Bos http://www.w3.org/People/Bos/ <bert@w3.org>
  5. # Yuichi Koike
  6. # Mary Holstege (holstege@mathling.com)
  7. # initial hack by DC Apr 2000, based on dtd2bnf by BB Mar 1998;
  8. # major revision to Apr 2000 make it actually usable by YK;
  9. # tweaks by DC; major update Jan 2001 by MH
  10. #
  11. # see Log since then at end.
  12. # $Id: dtd2xsd.pl,v 1.17 2001/01/19 05:59:12 connolly Exp $
  13. use strict;
  14. # Handling command line argument
  15. my $targetNS = "http://www.w3.org/namespace/";
  16. my $prefix = "t";
  17. my $alias = 0;
  18. my $file = "";
  19. my %SimpleTypes;
  20. my @AttrGroupPatterns;
  21. my @ModelGroupPatterns;
  22. my @SubstitutionGroupPatterns;
  23. my %SubstitutionGroup;
  24. my %Mixed;
  25. my %ModelGroup;
  26. my $mapping_file;
  27. my $pcdata_flag = 0;
  28. my $pcdata_simpletype = "string";
  29. my $debug = 0;
  30. while ($#ARGV >= 0) {
  31. my $para = shift(@ARGV);
  32. if ($para eq "-ns") {
  33. $targetNS = shift(@ARGV);
  34. } elsif ($para eq "-prefix") {
  35. $prefix = shift(@ARGV);
  36. } elsif ($para eq "-alias") {
  37. $alias = 1;
  38. } elsif ($para eq "-pcdata") {
  39. # Treat #PCDATA by itself as being string (or other simple type
  40. # if so designated in the mapping file)
  41. $pcdata_flag = 1;
  42. } elsif ($para eq "-mapfile") {
  43. $mapping_file = shift(@ARGV);
  44. } elsif ($para eq "-simpletype") {
  45. my($pat) = shift(@ARGV);
  46. my($b) = shift(@ARGV);
  47. $SimpleTypes{$pat} = $b;
  48. } elsif ($para eq "-attrgroup") {
  49. push(@AttrGroupPatterns, shift(@ARGV));
  50. } elsif ($para eq "-modelgroup") {
  51. push(@ModelGroupPatterns, shift(@ARGV));
  52. } elsif ($para eq "-substgroup") {
  53. push(@SubstitutionGroupPatterns, shift(@ARGV));
  54. } elsif ($para eq "-debug") {
  55. $debug = 1;
  56. } else {
  57. $file = $para;
  58. }
  59. }
  60. # Alias dictionary: defaults
  61. my %alias_dic;
  62. $alias_dic{"URI"} = "uriReference";
  63. $alias_dic{"LANG"} = "language";
  64. $alias_dic{"NUMBER"} = "nonNegativeInteger";
  65. $alias_dic{"Date"} = "date";
  66. $alias_dic{"Boolean"} = "boolean";
  67. if ( $mapping_file )
  68. {
  69. print STDERR "Open mapping $mapping_file ";
  70. if ( !open( MAPPINGS, "<$mapping_file" ) )
  71. {
  72. print STDERR "unsuccessful.\n";
  73. }
  74. else {
  75. print STDERR "successful.\n";
  76. while ( <MAPPINGS> ) {
  77. chop;
  78. if ( /^alias\s+([^ \t]+)\s*=\s*([^ \t]+)\s*/i ) {
  79. $alias_dic{$1} = $2;
  80. }
  81. elsif ( /^simpletype\s+([^ \t]+)\s*=\s*([^ \t]+)\s*/i ) {
  82. $SimpleTypes{$1} = $2;
  83. }
  84. elsif ( /^attrgroup\s+([^ \t]+)\s*/i ) {
  85. push( @AttrGroupPatterns, $1 );
  86. }
  87. elsif ( /^modelgroup\s+([^ \t]+)\s*/i ) {
  88. push( @ModelGroupPatterns, $1 );
  89. }
  90. elsif ( /^substgroup\s+([^ \t]+)\s*/i ) {
  91. push( @SubstitutionGroupPatterns, $1 );
  92. }
  93. elsif ( /^pcdata\s+([^ \t]+)\s*/i ) {
  94. ## BUGLET: doesn't pay attention to prefix; just a special alias
  95. $pcdata_simpletype = $1;
  96. }
  97. }
  98. }
  99. foreach my $key (keys(%alias_dic))
  100. {
  101. print STDERR "Alias \%$key to $alias_dic{$key}\n"
  102. }
  103. }
  104. # Variable declaration
  105. my $linelen = 72;
  106. my $PROG = substr($0, rindex($0, "/") + 1);
  107. my $USAGE = "Usage: $PROG file\n";
  108. my $str = "(?:\"([^\"]*)\"|\'([^\']*)\')";
  109. my %pent; # Parameter entities
  110. my %attributes; # Attribute lists
  111. my @element; # Elements in source order
  112. my %model; # Content models
  113. # Main
  114. $/ = undef;
  115. # Open file, remove comment and include external entity
  116. my $buf = openFile($file);
  117. # Alias treatment
  118. my $alias_ident = "_alias_";
  119. if ($alias eq 1) {
  120. foreach my $key (keys(%alias_dic)) {
  121. my $aliaskey = sprintf("%s%s%s", $alias_ident, $key, $alias_ident);
  122. $buf =~ s/\%$key;/$aliaskey/gsie;
  123. }
  124. }
  125. # store all parameter entities
  126. while ($buf =~ s/<!ENTITY\s+%\s+(\S+)\s+$str\s*>//sie) {
  127. my($n, $repltext) = ($1, $2.$3);
  128. my ($pat);
  129. next if $pent{$n}; # only the first declaration of an entity counts
  130. foreach $pat (keys %SimpleTypes){
  131. if ($n =~ /^$pat$/){
  132. $buf .= " <!_DATATYPE $n $SimpleTypes{$pat} $repltext> ";
  133. $pent{$n} = "#DATATYPEREF $n";
  134. undef $n;
  135. last;
  136. }
  137. }
  138. foreach $pat (@AttrGroupPatterns){
  139. if ($n =~ /^$pat$/){
  140. $buf .= " <!_ATTRGROUP $n $repltext> ";
  141. $pent{$n} = "#ATTRGROUPREF $n";
  142. undef $n;
  143. last;
  144. }
  145. }
  146. foreach $pat (@ModelGroupPatterns){
  147. if ($n =~ /^$pat$/){
  148. $buf .= " <!_MODELGROUP $n $repltext> ";
  149. $pent{$n} = "#MODELGROUPREF $n";
  150. undef $n;
  151. last;
  152. }
  153. }
  154. foreach $pat (@SubstitutionGroupPatterns){
  155. if ($n =~ /^$pat$/){
  156. $buf .= " <!_SUBSTGROUP $n $repltext> ";
  157. $pent{$n} = "#SUBSTGROUPREF $n";
  158. undef $n;
  159. last;
  160. }
  161. }
  162. $pent{$n}=$repltext if $n;
  163. }
  164. # remove all general entities
  165. $buf =~ s/<!ENTITY\s+.*?>//gsie;
  166. # loop until parameter entities fully expanded
  167. my $i;
  168. do {
  169. # count # of substitutions
  170. $i = 0;
  171. # expand parameter entities
  172. $buf =~ s/%([a-zA-Z0-9_\.-]+);?/$i++,$pent{$1}/gse;
  173. } while ($i != 0);
  174. # treat conditional sections
  175. while($buf =~ s/<!\[\s*?INCLUDE\s*?\[(.*)\]\]>/\1/gsie) {};
  176. while($buf =~ s/<!\[\s*?IGNORE\s*?\[.*\]\]>//gsie) {};
  177. # store attribute lists
  178. $buf =~ s/<!ATTLIST\s+(\S+)\s+(.*?)>/store_att($1, $2)/gsie;
  179. # store content models
  180. $buf =~ s/<!ELEMENT\s+(\S+)\s+(.+?)>/store_elt($1, $2)/gsie;
  181. #print "<?xml version='1.0'?>\n";
  182. print "<schema
  183. xmlns='http://www.w3.org/2000/10/XMLSchema'
  184. targetNamespace='$targetNS'
  185. xmlns:$prefix='$targetNS'>\n";
  186. # find maximum length of non-terminals
  187. #my $maxlen = max(map(length, @element)) + 4;
  188. # write simple type declarations
  189. $buf =~ s/<!_DATATYPE\s+(\S+)\s+(\S+)\s+(.+?)>/write_simpleType($1, $2, $3)/gsie;
  190. # write attribute groups
  191. $buf =~ s/<!_ATTRGROUP\s+(\S+)\s+(.+?)>/write_attrGroup($1, $2)/gsie;
  192. # write model groups
  193. $buf =~ s/<!_MODELGROUP\s+(\S+)\s+(.+?)>/write_modelGroup($1, $2)/gsie;
  194. # write subsitution groups
  195. $buf =~ s/<!_SUBSTGROUP\s+(\S+)\s+(.+?)>/write_substitutionGroup($1, $2)/gsie;
  196. my($e);
  197. # loop over elements, writing XML schema
  198. foreach $e (@element) {
  199. my $h = $model{$e};
  200. my $h2 = $attributes{$e};
  201. my @model = @$h;
  202. my $isSimple = ($pcdata_flag eq 1) && ($model[1] eq '#PCDATA') &&
  203. ( ($#model eq 2) ||
  204. ( ($#model eq 3) && ($model[3] eq '*') ) );
  205. my $substGroup = $SubstitutionGroup{$e};
  206. if ( $substGroup )
  207. {
  208. $substGroup = " substitutionGroup='$substGroup'";
  209. }
  210. # print rule for element $e
  211. if ( $isSimple && ! $h2 )
  212. {
  213. # Assume (#PCDATA) is string
  214. print "\n <element name='$e' type='$pcdata_simpletype'$substGroup>\n";
  215. }
  216. else {
  217. print "\n <element name='$e'$substGroup>\n";
  218. }
  219. if ( $isSimple )
  220. {
  221. # Assume (#PCDATA) is string
  222. if ( $h2 )
  223. {
  224. print " <complexType>\n";
  225. print " <simpleContent>\n";
  226. print " <extension base='string'>\n";
  227. }
  228. }
  229. else {
  230. # print rule for $e's content model
  231. print " <complexType";
  232. if ($model[0] eq 'EMPTY') {
  233. if (! $h2 ) {
  234. print "/>\n";
  235. } else {
  236. print ">\n";
  237. }
  238. }
  239. elsif ( $model[0] eq 'ANY' )
  240. {
  241. print ">\n";
  242. print " <sequence>\n";
  243. print " <any namespace='$targetNS'/>\n";
  244. print " </sequence>\n";
  245. }
  246. else {
  247. if ( $debug eq 1 ) {
  248. print STDERR "==mixed? @model\n"; #@@
  249. }
  250. if (&isMixed(@model)) {
  251. print " mixed='true'>\n";
  252. }
  253. else {
  254. print ">\n";
  255. }
  256. my @list = &makeChildList('', @model);
  257. &printChildList(3, @list);
  258. }
  259. }
  260. # print rule for $e's attributes
  261. if (! $h2) {
  262. # nothing
  263. } else {
  264. &printAttrDecls(@$h2);
  265. if ( $isSimple ) {
  266. print " </extension>\n";
  267. print " </simpleContent>\n";
  268. }
  269. }
  270. if ( !$h2 && $isSimple ) {
  271. # Do nothing
  272. }
  273. elsif ($h2 || $model[0] ne 'EMPTY') {
  274. print " </complexType>\n";
  275. }
  276. print " </element>\n";
  277. }
  278. print "</schema>\n";
  279. exit;
  280. sub printSpace
  281. {
  282. my ($num) = $_[0];
  283. for (my $i=0; $i<$num; $i++) {
  284. print " ";
  285. }
  286. }
  287. sub printChildList
  288. {
  289. my ($num, @list) = @_;
  290. my @currentTag = ();
  291. for (my $i=0; $i<= $#list; $i++) {
  292. my $n = $list[$i];
  293. if ($n eq 0 || $n eq 1 || $n eq 2 || $n eq 3) {
  294. if ( ($pcdata_flag eq 0) && ($n eq 0 || $n eq 1) && $list[$i+1] eq 20)
  295. {
  296. # The whole list is 0 20 or 1 20; i.e. (#PCDATA) or (#PCDATA)*.
  297. # Don't generate a sequence child; mixed handles all this.
  298. }
  299. else {
  300. # my $do_it_flag = 1;
  301. if ( $currentTag[$#currentTag] eq "" && $n eq 0 )
  302. {
  303. push(@currentTag, "");
  304. # my $n_1 = $list[$i+1];
  305. # if ( $n_1 eq 10 || $n_1 eq 11 || $n_1 eq 12 || $n_1 eq 13 )
  306. # {
  307. # # do nothing: we have a phantom sequence wrapping a choice
  308. # # that we want to not want to appear. OTOH we want a top
  309. # # level sequence in other cases.
  310. # $do_it_flag = 0;
  311. # }
  312. }
  313. # if ( $do_it_flag eq 1 )
  314. # {
  315. printSpace($num); $num++;
  316. print "<sequence";
  317. if ($n eq 1) {
  318. print " minOccurs='0' maxOccurs='unbounded'";
  319. } elsif ($n eq 2) {
  320. print " maxOccurs='unbounded'";
  321. } elsif ($n eq 3) {
  322. print " minOccurs='0' maxOccurs='1'";
  323. }
  324. print ">\n";
  325. push(@currentTag, "sequence");
  326. }
  327. #}
  328. } elsif ($n eq 10 || $n eq 11 || $n eq 12 || $n eq 13) {
  329. printSpace($num); $num++;
  330. print "<choice";
  331. if ($n eq 11) {
  332. print " minOccurs='0' maxOccurs='unbounded'";
  333. } elsif ($n eq 12) {
  334. print " maxOccurs='unbounded'";
  335. } elsif ($n eq 13) {
  336. print " minOccurs='0' maxOccurs='1'";
  337. }
  338. print ">\n";
  339. push(@currentTag, "choice");
  340. } elsif ($n eq 20) {
  341. my $tag = pop(@currentTag);
  342. if ($tag ne "") {
  343. $num--; printSpace($num);
  344. print "</", $tag, ">\n";
  345. }
  346. } else {
  347. printSpace($num);
  348. if ($n eq '#MODELGROUPREF') {
  349. print "<group ref='$prefix:$list[++$i]'";
  350. }
  351. elsif ($n eq '#SUBSTGROUPREF') {
  352. print "<element ref='$prefix:$list[++$i]'";
  353. } else {
  354. print "<element ref='$prefix:$n'";
  355. }
  356. if ($currentTag[$#currentTag] ne "choice") {
  357. if ($list[$i+1] eq "+") {
  358. print " maxOccurs='unbounded'";
  359. $i++;
  360. } elsif ($list[$i+1] eq "?") {
  361. print " minOccurs='0' maxOccurs='1'";
  362. $i++;
  363. } elsif ($list[$i+1] eq "*") {
  364. print " minOccurs='0' maxOccurs='unbounded'";
  365. $i++;
  366. }
  367. }
  368. print "/>\n";
  369. }
  370. }
  371. }
  372. sub makeChildList {
  373. my ($groupName, @model) = @_;
  374. my @ret = ();
  375. my @brace = ();
  376. for (my $i=0; $i<=$#model; $i++) {
  377. my $n = $model[$i];
  378. if ($n eq "(") {
  379. push(@ret, 0);
  380. push(@brace, $#ret);
  381. } elsif ($n eq ")") {
  382. if ($model[$i+1] eq "*") {
  383. $ret[$brace[$#brace]] += 1;
  384. $i++;
  385. } elsif ($model[$i+1] eq "+") {
  386. $ret[$brace[$#brace]] += 2;
  387. $i++;
  388. } elsif ($model[$i+1] eq "?") {
  389. $ret[$brace[$#brace]] += 3;
  390. $i++;
  391. }
  392. pop(@brace);
  393. push(@ret, 20);
  394. } elsif ($n eq ",") {
  395. $ret[$brace[$#brace]] = 0;
  396. } elsif ($n eq "|") {
  397. $ret[$brace[$#brace]] = 10;
  398. } elsif ($n eq "#PCDATA") {
  399. if ($model[$i+1] eq "|") {
  400. $i++;
  401. }
  402. if($groupName){
  403. $Mixed{$groupName} = 1;
  404. }
  405. } else {
  406. push(@ret, $n);
  407. }
  408. }
  409. # "( ( a | b | c )* )" gets mapped to "0 10 a b c 20 20" which will generate
  410. # a spurious sequence element. This is not too harmful when this is an
  411. # element content model, but with model groups it is incorrect.
  412. # In general we need to strip off 0 20 from the ends when it is redundant.
  413. # Redundant means: there is some other group that bounds the whole list.
  414. # Note that it gets a little tricky:
  415. # ( (a|b),(c|d) ) gets mapped to "0 10 a b 20 10 c d 20 20". If one
  416. # naively chops off the 0 and 20 on the groups that there is a 10 on one
  417. # end and a 20 on the other, one loses the bounding sequence, which is
  418. # required in this case.
  419. #
  420. if ( $ret[0] eq 0 && $ret[$#ret] eq 20 && $ret[$#ret-1] eq 20 &&
  421. ( $ret[1] eq 0 || $ret[1] eq 1 || $ret[1] eq 2 || $ret[1] eq 3 ||
  422. $ret[1] eq 10 || $ret[1] eq 11 || $ret[1] eq 12 || $ret[1] eq 13 )
  423. )
  424. {
  425. # OK, it is possible that the 0 20 is redundant. Now scan for balance:
  426. # All interim 20 between the proposed new start and the proposed new
  427. # final one should be at level 1 or above.
  428. my $depth = 0;
  429. my $redundant_paren = 1; # Assume redundant until proved otherwise
  430. for ( my $i = 1; $i <= $#ret-1; $i++ )
  431. {
  432. if ( $ret[$i] eq 20 )
  433. {
  434. $depth--;
  435. if ( $i < $#ret-1 && $depth < 1 )
  436. {
  437. $redundant_paren = 0;
  438. print STDERR "i=$i,depth=$depth\n";
  439. }
  440. }
  441. elsif ( $ret[$i] eq 0 ||
  442. $ret[$i] eq 1 ||
  443. $ret[$i] eq 2 ||
  444. $ret[$i] eq 3 ||
  445. $ret[$i] eq 10 ||
  446. $ret[$i] eq 11 ||
  447. $ret[$i] eq 12 ||
  448. $ret[$i] eq 13
  449. )
  450. {
  451. $depth++;
  452. }
  453. } # for
  454. if ( $redundant_paren eq 1 )
  455. {
  456. print STDERR "Truncating @ret\n";
  457. @ret = @ret[1..$#ret-1];
  458. }
  459. }
  460. if ( $debug eq 1 ) {
  461. print STDERR "@model to @ret\n";
  462. }
  463. return @ret;
  464. }
  465. sub printAttrDecls{
  466. my @atts = @_;
  467. for (my $i = 0; $i <= $#atts; $i++) {
  468. if ($atts[$i] eq '#ATTRGROUPREF'){
  469. print " <attributeGroup ref='$prefix:$atts[$i+1]'/>\n";
  470. $i ++;
  471. } else {
  472. # attribute name
  473. print " <attribute name='$atts[$i]'";
  474. # attribute type
  475. my @enume;
  476. $i++;
  477. if ($atts[$i] eq "(") {
  478. # like `attname ( yes | no ) #REQUIRED`
  479. $i++;
  480. while ($atts[$i] ne ")") {
  481. if ($atts[$i] ne "|") {
  482. push(@enume, $atts[$i]);
  483. }
  484. $i++;
  485. }
  486. } elsif ($atts[$i] eq '#DATATYPEREF'){
  487. print " type='$prefix:$atts[++$i]'";
  488. } elsif ($alias eq 1 && $atts[$i] =~ s/$alias_ident//gsie) {
  489. # alias special
  490. print " type='$alias_dic{$atts[$i]}'";
  491. } elsif ($atts[$i] =~ /ID|IDREF|ENTITY|NOTATION|IDREFS|ENTITIES|NMTOKEN|NMTOKENS/) {
  492. # common type for DTD and Schema
  493. print " type='$atts[$i]'";
  494. } else {
  495. # `attname CDATA #REQUIRED`
  496. print " type='string'";
  497. }
  498. $i++;
  499. # #FIXED
  500. if($atts[$i] eq "#FIXED") {
  501. $i++;
  502. print " use='fixed' value='$atts[$i]'/>\n";
  503. } else {
  504. # minOccurs
  505. if ($atts[$i] eq "#REQUIRED") {
  506. print " use='required'";
  507. } elsif ($atts[$i] eq "#IMPLIED") {
  508. print " use='optional'";
  509. } else {
  510. print " use='default' value='$atts[$i]'";
  511. }
  512. # enumerate
  513. if ($#enume eq -1) {
  514. print "/>\n";
  515. } else {
  516. print ">\n";
  517. print " <simpleType>\n";
  518. print " <restriction base='string'>\n";
  519. &write_enum(@enume);
  520. print " </restriction>\n";
  521. print " </simpleType>\n";
  522. print " </attribute>\n";
  523. }
  524. }
  525. }
  526. }
  527. }
  528. sub write_enum{
  529. my(@enume) = @_;
  530. for (my $j = 0; $j <= $#enume; $j++) {
  531. print " <enumeration value='$enume[$j]'/>\n";
  532. }
  533. }
  534. # Parse a string into an array of "words".
  535. # Words are whitespace-separated sequences of non-whitespace characters,
  536. # or quoted strings ("" or ''), with the quotes removed.
  537. # HACK: added () stuff for attlist stuff
  538. # Parse words for attribute list
  539. sub parsewords {
  540. my $line = $_[0];
  541. $line =~ s/(\(|\)|\|)/ $1 /g;
  542. my @words = ();
  543. while ($line ne '') {
  544. if ($line =~ /^\s+/) {
  545. # Skip whitespace
  546. } elsif ($line =~ /^\"((?:[^\"]|\\\")*)\"/) {
  547. push(@words, $1);
  548. } elsif ($line =~ /^\'((?:[^\']|\\\')*)\'/) {
  549. push(@words, $1);
  550. } elsif ($line =~ /^\S+/) {
  551. push(@words, $&);
  552. } else {
  553. die "Cannot happen\n";
  554. }
  555. $line = $';
  556. }
  557. return @words;
  558. }
  559. # Store content model, return empty string
  560. sub store_elt
  561. {
  562. my ($name, $model) = @_;
  563. $model =~ s/\s+/ /gso;
  564. push(@element, $name);
  565. my @words;
  566. while ($model =~ s/^\s*(\(|\)|,|\+|\?|\||[\w_\.-]+|\#\w+|\*)//) {
  567. push(@words, $1);
  568. };
  569. $model{$name} = [ @words ];
  570. return '';
  571. }
  572. # Store attribute list, return empty string
  573. sub store_att
  574. {
  575. my ($element, $atts) = @_;
  576. my @words = parsewords($atts);
  577. $attributes{$element} = [ @words ];
  578. return '';
  579. }
  580. sub write_simpleType{
  581. my($n, $b, $stuff) = @_;
  582. my @words = parsewords($stuff);
  583. print "\n <simpleType name='$n'>\n";
  584. print " <restriction base='$b'>\n";
  585. # print STDERR "\n==stuff:\n$stuff \n\n===\n", join('|', @words);
  586. my $i = 0;
  587. my @enume;
  588. if ($words[$i] eq "(") {
  589. $i++;
  590. while ($words[$i] ne ")") {
  591. if ($words[$i] ne "|") {
  592. push(@enume, $words[$i]);
  593. }
  594. $i++;
  595. }
  596. write_enum(@enume);
  597. }
  598. print " </restriction>\n";
  599. print " </simpleType>\n";
  600. }
  601. sub write_attrGroup{
  602. my($n, $stuff) = @_;
  603. my @words = parsewords($stuff);
  604. print "\n <attributeGroup name='$n'>\n";
  605. # print STDERR "\n==stuff:\n$stuff \n\n===\n", join('|', @words);
  606. printAttrDecls(@words);
  607. print " </attributeGroup>\n";
  608. }
  609. sub write_modelGroup{
  610. my($n, $stuff) = @_;
  611. my @words = parsewords($stuff);
  612. print "\n <group name='$n'>\n";
  613. print "<!-- $stuff -->\n";
  614. my @list = &makeChildList($n, '(', @words, ')');
  615. &printChildList(3, @list);
  616. $ModelGroup{$n} = \@list;
  617. print " </group>\n";
  618. }
  619. sub write_substitutionGroup
  620. {
  621. my($n, $stuff) = @_;
  622. my @words = parsewords($stuff);
  623. print "\n <element name='$n' abstract='true'>\n";
  624. my @list = &makeChildList($n, '(', @words, ')');
  625. for ( my $i = 0; $i < $#list; $i++ )
  626. {
  627. $SubstitutionGroup{ $list[$i] } = $n;
  628. }
  629. print " </element>\n";
  630. }
  631. sub isMixed{
  632. my(@model) = @_;
  633. my $isSimple = ($pcdata_flag eq 1) && ($model[1] eq '#PCDATA') &&
  634. ( ($#model eq 2) ||
  635. ( ($#model eq 3) && ($model[3] eq '*') ) );
  636. if ( $debug eq 1 ) {
  637. print STDERR "++ mixed? @model\n"; #@@
  638. }
  639. if ( $isSimple )
  640. {
  641. if ( $debug eq 1 )
  642. {
  643. print STDERR "++ no; simple type. @model\n"; #@@
  644. }
  645. return 0;
  646. }
  647. my($i);
  648. for ($i = 0; $i <= $#model; $i++) {
  649. if ( $model[$i] eq '#PCDATA' ||
  650. ($model[$i] eq '#MODELGROUPREF' && $Mixed{$model[$i+1]}) ||
  651. ($model[$i] eq '#SUBSTGROUPREF' && $Mixed{$model[$i+1]}) )
  652. {
  653. if ( $debug eq 1 ) {
  654. print STDERR "++ yes! $i @model\n"; #@@
  655. }
  656. return 1;
  657. }
  658. }
  659. if ( $debug eq 1 ) {
  660. print STDERR "++ no. @model\n"; #@@
  661. }
  662. return 0;
  663. }
  664. # Return maximum value of an array of numbers
  665. sub max
  666. {
  667. my $max = $_[0];
  668. foreach my $i (@_) {
  669. if ($i > $max) {$max = $i;}
  670. }
  671. return $max;
  672. }
  673. # 1) Open file
  674. # 2) Remove comment, processing instructions, and general entities
  675. # 3) Include external parameter entities recursively
  676. # 4) Return the contents of opened file
  677. sub openFile {
  678. my $file = $_[0];
  679. my %extent;
  680. my $bufbuf;
  681. if ($file ne "") {
  682. print STDERR "open $file ";
  683. if(! open AAA, $file) {
  684. print STDERR " failed!!\n";
  685. return "";
  686. }
  687. print STDERR " successful\n";
  688. $bufbuf = <AAA>;
  689. } else {
  690. print STDERR "open STDIN successful\n";
  691. $bufbuf = <>;
  692. }
  693. # remove comments
  694. $bufbuf =~ s/<!--.*?-->//gso;
  695. # remove processing instructions
  696. $bufbuf =~ s/<\?.*?>//gso;
  697. # store external parameter entities
  698. while ($bufbuf =~ s/<!ENTITY\s+%\s+(\S+)\s+PUBLIC\s+$str\s+$str.*?>//sie) {
  699. $extent{$1} = $4.$5;
  700. }
  701. while ($bufbuf =~ s/<!ENTITY\s+%\s+(\S+)\s+SYSTEM\s+$str.*?>//sie) {
  702. $extent{$1} = $2.$3;
  703. }
  704. # read external entity files
  705. foreach my $key (keys(%extent)) {
  706. $bufbuf =~ s/%$key;/openFile($extent{$key})/gsie;
  707. }
  708. return $bufbuf;
  709. }
  710. # $Log: dtd2xsd.pl,v $
  711. # Revision 1.17 2001/01/19 05:59:12 connolly
  712. # more changelog stuff; link to MH's announcement etc.
  713. #
  714. # Revision 1.16 2001/01/19 05:55:56 connolly
  715. # added Log at end
  716. #
  717. # Changes: 2001/01/10
  718. # Date: Thu, 11 Jan 2001 14:51:44 -0800
  719. # From: Mary Holstege <holstege@mathling.com>
  720. # To: xml-dev@lists.xml.org
  721. # Subject: [ANN] Updated version of DTD to XML Schema tool
  722. # http://lists.xml.org/archives/xml-dev/200101/msg00481.html
  723. # http://www.mathling.com/xmlschema/
  724. # Switch to CR syntax
  725. # Support external mapping file for type aliases, simple types, model and
  726. # attribute groups
  727. # Map ANY correctly to wildcard rather than element 'ANY'
  728. # Support treating lead PCDATA as string or other aliased simple type instead
  729. # of as mixed content (may be more appropriate for data-oriented DTDs)
  730. # e.g. <!ELEMENT title (#PCDATA)> => <element name="title" type="string"/>
  731. # Support subsitution groups.