1 /** 2 * Copyright: Copyright (c) 2016 Wojciech Szęszoł. All rights reserved. 3 * Authors: Wojciech Szęszoł 4 * Version: Initial created: Jul 08, 2016 5 * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0) 6 */ 7 module dstep.translator.Preprocessor; 8 9 import std.array; 10 11 import clang.Cursor; 12 import clang.Index; 13 import clang.SourceLocation; 14 import clang.SourceRange; 15 import clang.Token; 16 import clang.TranslationUnit; 17 18 public import dstep.translator.MacroDefinition; 19 public import dstep.translator.MacroParser; 20 21 enum DirectiveKind 22 { 23 elif, 24 else_, 25 endif, 26 error, 27 define, 28 if_, 29 ifdef, 30 ifndef, 31 include, 32 line, 33 undef, 34 pragmaOnce, 35 } 36 37 bool isIf(DirectiveKind kind) 38 { 39 return kind == DirectiveKind.if_ || 40 kind == DirectiveKind.ifdef || 41 kind == DirectiveKind.ifndef; 42 } 43 44 class Directive 45 { 46 Token[] tokens; 47 SourceRange extent; 48 DirectiveKind kind; 49 50 @property SourceLocation location() 51 { 52 return extent.start; 53 } 54 55 override string toString() 56 { 57 import std.format : format; 58 return format("Directive(kind = %s)", kind); 59 } 60 } 61 62 class ConditionalDirective : Directive 63 { 64 Expression condition; 65 Directive[] branches; 66 Directive endif; 67 } 68 69 class PragmaDirective : Directive 70 { 71 } 72 73 class DefineDirective : Directive 74 { 75 MacroDefinition macroDefinition; 76 alias macroDefinition this; 77 78 this (MacroDefinition macroDefinition) 79 { 80 this.macroDefinition = macroDefinition; 81 } 82 } 83 84 class UndefDirective : Directive 85 { 86 string identifier; 87 } 88 89 struct TokenizedDirectiveRange 90 { 91 string source; 92 Token[] tokens; 93 Token[] result; 94 95 this(Token[] tokens, string source = null) 96 { 97 this.tokens = tokens; 98 this.source = source; 99 100 popFront(); 101 } 102 103 @property bool empty() const 104 { 105 return result.empty; 106 } 107 108 @property Token[] front() 109 { 110 return result; 111 } 112 113 void popFront() 114 { 115 findNext(); 116 yield(); 117 } 118 119 @property string toString() const 120 { 121 return "TokenizedDirectiveRange(..)"; 122 } 123 124 private void findNext() 125 { 126 if (1 < tokens.length && 127 tokens[0].spelling == "#" && 128 isDirective(tokens[1])) 129 return; 130 131 while (2 < tokens.length && 132 (tokens[1].spelling != "#" || 133 tokens[0].location.line >= tokens[1].location.line || 134 !isDirective(tokens[2]))) 135 tokens.popFront(); 136 137 if (2 < tokens.length) 138 tokens.popFront(); 139 else 140 tokens = Token[].init; 141 } 142 143 private size_t countEscaped(string range) 144 { 145 return 0; 146 } 147 148 private void yield() 149 { 150 result = Token[].init; 151 152 size_t itr = 0; 153 154 while (itr + 1 < tokens.length) 155 { 156 auto loc0 = tokens[itr].location; 157 auto loc1 = tokens[itr + 1].location; 158 159 ptrdiff_t diff = loc1.line - loc0.line; 160 161 if (0 < diff && 162 (source == null || 163 countEscaped(source[loc0.offset .. loc1.offset]) < diff)) 164 break; 165 166 ++itr; 167 } 168 169 if (itr + 1 < tokens.length) 170 { 171 result = tokens[0 .. itr + 1]; 172 tokens = tokens[itr + 1 .. $]; 173 } 174 else 175 { 176 result = tokens; 177 tokens = Token[].init; 178 } 179 } 180 181 private static bool isDirective(Token token) 182 { 183 switch (token.spelling) 184 { 185 case "elif": 186 case "else": 187 case "endif": 188 case "error": 189 case "define": 190 case "if": 191 case "ifdef": 192 case "ifndef": 193 case "include": 194 case "line": 195 case "pragma": 196 case "undef": 197 return true; 198 199 default: 200 return false; 201 } 202 } 203 } 204 205 TokenizedDirectiveRange tokenizedDirectives(Token[] tokens, string source = null) 206 { 207 return TokenizedDirectiveRange(tokens, source); 208 } 209 210 TokenizedDirectiveRange tokenizedDirectives(string source) 211 { 212 return tokenizedDirectives(tokenizeNoComments(source), source); 213 } 214 215 struct DirectiveRange 216 { 217 TranslationUnit translUnit; 218 TokenizedDirectiveRange tokensRange; 219 Directive front_; 220 221 this(TranslationUnit translUnit) 222 { 223 tokensRange = tokenizedDirectives(translUnit.tokensNoComments, translUnit.source); 224 225 this.translUnit = translUnit; 226 227 popFront(); 228 } 229 230 @property bool empty() const 231 { 232 return front_ is null; 233 } 234 235 @property Directive front() 236 { 237 return front_; 238 } 239 240 void popFront() 241 { 242 front_ = parseDirective(tokensRange.front); 243 tokensRange.popFront(); 244 245 while (!tokensRange.empty && !front_) 246 { 247 front_ = parseDirective(tokensRange.front); 248 tokensRange.popFront(); 249 } 250 } 251 252 @property string toString() const 253 { 254 return "DirectiveRange(..)"; 255 } 256 257 bool acceptDirective(alias spelling)(ref Token[] tokens) 258 { 259 if (1 < tokens.length && 260 tokens[0].spelling == "#" && 261 tokens[1].spelling == spelling) 262 { 263 tokens = tokens[2 .. $]; 264 return true; 265 } 266 267 return false; 268 } 269 270 Expression parseIf(Token[] tokens) 271 { 272 if (acceptDirective!"if"(tokens)) 273 { 274 auto expr = parseExpr(tokens, true); 275 276 if (expr.hasValue && tokens.empty) 277 return expr; 278 } 279 280 return Expression.init; 281 } 282 283 Expression parseIfdef(Token[] tokens) 284 { 285 string spelling; 286 287 if (tokens.length == 3 && 288 acceptDirective!"ifdef"(tokens) && 289 acceptIdentifier(tokens, spelling)) 290 { 291 auto expr = DefinedExpr(); 292 expr.identifier = spelling; 293 294 return Expression(expr); 295 } 296 297 return Expression.init; 298 } 299 300 Expression parseIfndef(Token[] tokens) 301 { 302 string spelling; 303 304 if (tokens.length == 3 && 305 acceptDirective!"ifndef"(tokens) && 306 acceptIdentifier(tokens, spelling)) 307 { 308 auto defined = DefinedExpr(); 309 defined.identifier = spelling; 310 311 auto expr = new UnaryExpr(); 312 expr.subexpr = defined; 313 expr.operator = "!"; 314 315 return Expression(expr); 316 } 317 318 return Expression.init; 319 } 320 321 Expression parseElif(Token[] tokens) 322 { 323 if (acceptDirective!"elif"(tokens)) 324 { 325 auto expr = parseExpr(tokens, true); 326 327 if (expr.hasValue && tokens.empty) 328 return expr; 329 } 330 331 return Expression.init; 332 } 333 334 Expression parseIfCombined(ref DirectiveKind kind, Token[] tokens) 335 { 336 Expression expr; 337 338 expr = parseIf(tokens); 339 if (expr.hasValue) 340 { 341 kind = DirectiveKind.if_; 342 return expr; 343 } 344 345 expr = parseIfdef(tokens); 346 if (expr.hasValue) 347 { 348 kind = DirectiveKind.ifdef; 349 return expr; 350 } 351 352 expr = parseIfndef(tokens); 353 if (expr.hasValue) 354 { 355 kind = DirectiveKind.ifndef; 356 return expr; 357 } 358 359 expr = parseElif(tokens); 360 if (expr.hasValue) 361 { 362 kind = DirectiveKind.elif; 363 return expr; 364 } 365 366 return Expression.init; 367 } 368 369 bool parseElse(ref DirectiveKind kind, Token[] tokens) 370 { 371 kind = DirectiveKind.else_; 372 return acceptDirective!"else"(tokens) && tokens.empty; 373 } 374 375 bool parseEndif(ref DirectiveKind kind, Token[] tokens) 376 { 377 kind = DirectiveKind.endif; 378 return acceptDirective!"endif"(tokens) && tokens.empty; 379 } 380 381 private Directive parseConditional(Token[] tokens) 382 { 383 DirectiveKind kind; 384 385 auto expr = parseIfCombined(kind, tokens); 386 387 if (expr.hasValue) 388 { 389 auto directive = new ConditionalDirective; 390 directive.kind = kind; 391 directive.condition = expr; 392 directive.tokens = tokens; 393 directive.extent = tokens.extent; 394 return directive; 395 } 396 397 if (parseElse(kind, tokens) || parseEndif(kind, tokens)) 398 { 399 auto directive = new Directive; 400 directive.kind = kind; 401 directive.tokens = tokens; 402 directive.extent = tokens.extent; 403 return directive; 404 } 405 406 return null; 407 } 408 409 private Directive parseDefine(Token[] tokens, Cursor[string] table) 410 { 411 auto local = tokens; 412 413 if (!accept!("#")(local, TokenKind.punctuation)) 414 return null; 415 416 if (!accept!("define")(local, TokenKind.identifier)) 417 return null; 418 419 MacroDefinition result = parsePartialMacroDefinition(local, table, true); 420 421 if (result !is null) 422 tokens = local; 423 424 if (tokens.empty) 425 return new DefineDirective(result); 426 427 return null; 428 } 429 430 private Directive parseDefine(Token[] tokens) 431 { 432 Cursor[string] table; 433 434 return parseDefine(tokens, table); 435 } 436 437 private Directive parseError(Token[] tokens) 438 { 439 return null; 440 } 441 442 Directive parseUndef(Token[] tokens) 443 { 444 string spelling; 445 446 if (tokens.length == 3 && 447 acceptDirective!"undef"(tokens) && 448 acceptIdentifier(tokens, spelling)) 449 { 450 auto expr = new UndefDirective(); 451 expr.identifier = spelling; 452 453 return expr; 454 } 455 456 return null; 457 } 458 459 Directive parseInclude(Token[] tokens) 460 { 461 return null; 462 } 463 464 Directive parseLine(Token[] tokens) 465 { 466 return null; 467 } 468 469 Directive parsePragma(Token[] tokens) 470 { 471 if (acceptDirective!"pragma"(tokens)) 472 { 473 if (tokens.length == 1) 474 { 475 if (tokens[0].spelling == "once") 476 { 477 auto directive = new PragmaDirective(); 478 directive.kind = DirectiveKind.pragmaOnce; 479 directive.tokens = tokens; 480 directive.extent = tokens.extent; 481 return directive; 482 } 483 } 484 } 485 486 return null; 487 } 488 489 private Directive parseDirective(Token[] tokens) 490 { 491 if (auto directive = parseConditional(tokens)) 492 return directive; 493 else if (auto directive = parseDefine(tokens)) 494 return directive; 495 else if (auto directive = parseError(tokens)) 496 return directive; 497 else if (auto directive = parseInclude(tokens)) 498 return directive; 499 else if (auto directive = parseLine(tokens)) 500 return directive; 501 else if (auto directive = parsePragma(tokens)) 502 return directive; 503 else if (auto directive = parseUndef(tokens)) 504 return directive; 505 else 506 return null; 507 } 508 } 509 510 void updateConditions(Directive[] directives) 511 { 512 void update(ref Directive[] directives) 513 { 514 Directive[] branches = [ directives.front ]; 515 directives.popFront(); 516 517 while (!directives.empty) 518 { 519 if (directives.front.kind == DirectiveKind.endif) 520 { 521 foreach (branch; branches) 522 { 523 if (auto conditional = cast(ConditionalDirective) branch) 524 { 525 conditional.branches = branches; 526 conditional.endif = directives.front; 527 } 528 } 529 530 directives.popFront(); 531 break; 532 } 533 else if ( 534 directives.front.kind == DirectiveKind.elif || 535 directives.front.kind == DirectiveKind.else_) 536 { 537 branches ~= directives.front; 538 directives.popFront(); 539 } 540 else if (directives.front.kind.isIf) 541 { 542 update(directives); 543 } 544 else 545 { 546 directives.popFront(); 547 } 548 } 549 } 550 551 void updateTopLevel(ref Directive[] directives) 552 { 553 while (!directives.empty) 554 { 555 if (directives.front.kind.isIf) 556 update(directives); 557 else 558 directives.popFront(); 559 } 560 } 561 562 updateTopLevel(directives); 563 } 564 565 Directive[] directives(TranslationUnit translUnit) 566 { 567 auto directives = DirectiveRange(translUnit).array; 568 569 updateConditions(directives); 570 571 return directives; 572 } 573 574 Directive[] directives(string source) 575 { 576 import std.array : array; 577 578 Index index = Index(false, false); 579 580 return directives(TranslationUnit.parseString(index, source)); 581 } 582 583 unittest 584 { 585 import std.array : array; 586 587 auto x0 = tokenizedDirectives("").array; 588 589 assert(x0.length == 0); 590 591 592 auto x1 = tokenizedDirectives("int x = 3;").array; 593 594 assert(x1.length == 0); 595 596 597 auto x2 = tokenizedDirectives(q"C 598 int x = 3; 599 600 int f() 601 { 602 return 42; 603 } 604 C").array; 605 606 assert(x2.length == 0); 607 608 609 auto x3 = tokenizedDirectives(q"C 610 #define FOO 611 C").array; 612 613 assert(x3.length == 1); 614 615 616 auto x4 = tokenizedDirectives(q"C 617 #define FOO 0 618 619 #define BAR 1 620 621 #define BAZ 2 622 C").array; 623 624 assert(x4.length == 3); 625 626 627 auto x5 = tokenizedDirectives(q"C 628 #if FOO == 0 629 630 #elif FOO == 1 631 632 #else 633 634 #endif 635 C").array; 636 637 assert(x5.length == 4); 638 639 640 auto x6 = tokenizedDirectives(q"C 641 #ifdef FOO 642 643 #endif 644 645 #ifndef FOO 646 647 #endif 648 C").array; 649 650 assert(x6.length == 4); 651 652 653 auto x7 = tokenizedDirectives(q"C 654 #pragma once 655 #include <stdio.h> 656 #define FOO 657 C").array; 658 659 assert(x7.length == 3); 660 661 662 auto x8 = tokenizedDirectives(q"C 663 #pragma once 664 #line 44 665 C").array; 666 667 assert(x8.length == 2); 668 669 670 auto x9 = tokenizedDirectives("#pragma once").array; 671 672 assert(x9.length == 1); 673 assert(x9[0][0].spelling == "#"); 674 assert(x9[0][1].spelling == "pragma"); 675 assert(x9[0][2].spelling == "once"); 676 677 678 auto x10 = tokenizedDirectives(q"C 679 #define FOO 0 680 #define BAR 1 681 #define BAZ 2 682 C").array; 683 684 assert(x10.length == 3); 685 assert(x10[0][0].spelling == "#"); 686 assert(x10[0][1].spelling == "define"); 687 assert(x10[0][2].spelling == "FOO"); 688 assert(x10[0][3].spelling == "0"); 689 690 assert(x10.length == 3); 691 assert(x10[1][0].spelling == "#"); 692 assert(x10[1][1].spelling == "define"); 693 assert(x10[1][2].spelling == "BAR"); 694 assert(x10[1][3].spelling == "1"); 695 696 assert(x10.length == 3); 697 assert(x10[2][0].spelling == "#"); 698 assert(x10[2][1].spelling == "define"); 699 assert(x10[2][2].spelling == "BAZ"); 700 assert(x10[2][3].spelling == "2"); 701 } 702 703 unittest 704 { 705 auto x0 = directives(``); 706 707 assert(x0.length == 0); 708 } 709 710 unittest 711 { 712 auto x0 = directives(`#define FOO`); 713 714 assert(x0.length == 1); 715 assert(cast(DefineDirective) x0[0]); 716 717 auto foo = cast(DefineDirective) x0[0]; 718 719 assert(foo.macroDefinition.spelling == "FOO"); 720 } 721 722 unittest 723 { 724 auto x0 = directives(`#pragma once`); 725 726 assert(x0.length == 1); 727 assert(cast(PragmaDirective) x0[0]); 728 729 auto foo = cast(PragmaDirective) x0[0]; 730 731 assert(foo.kind == DirectiveKind.pragmaOnce); 732 } 733 734 // Test parsing of basic conditions. 735 unittest 736 { 737 auto case0 = directives(` 738 #ifndef FOO 739 740 #endif`); 741 742 assert(case0.length == 2); 743 assert(case0[0].kind == DirectiveKind.ifndef); 744 assert(case0[1].kind == DirectiveKind.endif); 745 746 auto cond0 = cast(ConditionalDirective) case0[0]; 747 748 assert(cond0); 749 750 auto unary0 = cond0.condition.peek!UnaryExpr; 751 752 assert(unary0); 753 assert(unary0.operator == "!"); 754 755 auto defined0 = unary0.subexpr.peek!DefinedExpr; 756 757 assert(defined0); 758 assert(defined0.identifier == "FOO"); 759 760 761 auto case1 = directives(` 762 #ifdef FOO 763 764 #endif`); 765 766 assert(case1.length == 2); 767 assert(cast(ConditionalDirective) case1[0]); 768 769 auto cond1 = cast(ConditionalDirective) case1[0]; 770 771 assert(cond1); 772 assert(cond1.condition.hasValue); 773 774 775 auto case2 = directives(` 776 #if 1 777 778 #endif`); 779 780 assert(case2.length == 2); 781 assert(cast(ConditionalDirective) case2[0]); 782 783 auto cond2 = cast(ConditionalDirective) case2[0]; 784 785 assert(cond2); 786 assert(cond2.condition.hasValue); 787 } 788 789 // Test parsing of multi-branch directives. 790 unittest 791 { 792 auto case0 = directives(` 793 #if FOO 794 795 #elif BAR 796 797 #elif BAZ 798 799 #else 800 801 #endif`); 802 803 assert(case0.length == 5); 804 805 assert(case0[0].kind == DirectiveKind.if_); 806 assert(case0[1].kind == DirectiveKind.elif); 807 assert(case0[2].kind == DirectiveKind.elif); 808 assert(case0[3].kind == DirectiveKind.else_); 809 assert(case0[4].kind == DirectiveKind.endif); 810 811 auto cond0 = cast(ConditionalDirective) case0[0]; 812 auto cond1 = cast(ConditionalDirective) case0[1]; 813 auto cond2 = cast(ConditionalDirective) case0[2]; 814 815 assert(cond0); 816 assert(cond1); 817 assert(cond2); 818 819 auto id0 = cond0.condition.peek!Identifier; 820 auto id1 = cond1.condition.peek!Identifier; 821 auto id2 = cond2.condition.peek!Identifier; 822 823 assert(id0); 824 assert(id0.spelling == "FOO"); 825 assert(id1); 826 assert(id1.spelling == "BAR"); 827 assert(id2); 828 assert(id2.spelling == "BAZ"); 829 } 830 831 // Parse `defined` operator. 832 unittest 833 { 834 auto case0 = directives(` 835 #if defined FOO 836 837 #endif`); 838 839 auto cond0 = cast(ConditionalDirective) case0[0]; 840 841 assert(cond0); 842 843 844 auto case1 = directives(` 845 #if defined(FOO) 846 847 #endif`); 848 849 auto cond1 = cast(ConditionalDirective) case1[0]; 850 851 assert(cond1); 852 853 auto expr = cond1.condition.peek!DefinedExpr; 854 855 assert(expr); 856 assert(expr.identifier == "FOO"); 857 } 858 859 // Test if branch pointer are arranged correctly. 860 unittest 861 { 862 auto case0 = directives(` 863 #if 1 864 865 #endif`); 866 867 auto if0 = cast(ConditionalDirective) case0[0]; 868 869 assert(if0); 870 assert(if0.branches.length == 1); 871 assert(if0.endif == case0[1]); 872 873 874 auto case1 = directives(` 875 #if 1 876 877 #else 878 879 #endif`); 880 881 auto if1 = cast(ConditionalDirective) case1[0]; 882 883 assert(if1); 884 assert(if1.branches.length == 2); 885 assert(if1.endif == case1[2]); 886 887 888 auto case2 = directives(` 889 #if 1 890 891 #elif defined FOO 892 893 #else 894 895 #endif`); 896 897 auto if2 = cast(ConditionalDirective) case2[0]; 898 899 assert(if2); 900 assert(if2.branches.length == 3); 901 assert(if2.branches[0] == case2[0]); 902 assert(if2.branches[1] == case2[1]); 903 assert(if2.branches[2] == case2[2]); 904 assert(if2.endif == case2[3]); 905 906 907 auto case3 = directives(` 908 #if 1 909 910 #elif defined FOO 911 912 #else 913 914 #endif 915 916 #define BAR 917 #undef BAR 918 #define BAZ 919 920 #if 0 921 922 #else 923 924 #endif 925 926 #define FUN(x, y) x + y`); 927 928 assert(case3.length == 11); 929 930 auto if3_0 = cast(ConditionalDirective) case3[0]; 931 auto if3_1 = cast(ConditionalDirective) case3[7]; 932 933 assert(if3_0); 934 assert(if3_1); 935 936 assert(if3_0.branches.length == 3); 937 assert(if3_1.branches.length == 2); 938 939 assert(if3_0.branches[0] == case3[0]); 940 assert(if3_0.branches[1] == case3[1]); 941 assert(if3_0.branches[2] == case3[2]); 942 assert(if3_0.endif == case3[3]); 943 944 assert(if3_1.branches[0] == case3[7]); 945 assert(if3_1.branches[1] == case3[8]); 946 assert(if3_1.endif == case3[9]); 947 948 949 auto case4 = directives(` 950 #if 1 951 952 #elif defined FOO 953 954 #ifdef BAR 955 956 #else 957 958 #endif 959 960 #else 961 962 #endif`); 963 964 965 assert(case4.length == 7); 966 967 auto if4_0 = cast(ConditionalDirective) case4[0]; 968 auto if4_1 = cast(ConditionalDirective) case4[2]; 969 970 assert(if4_0.branches[0] == case4[0]); 971 assert(if4_0.branches[1] == case4[1]); 972 assert(if4_0.branches[2] == case4[5]); 973 assert(if4_0.endif == case4[6]); 974 975 assert(if4_1.branches[0] == case4[2]); 976 assert(if4_1.branches[1] == case4[3]); 977 assert(if4_1.endif == case4[4]); 978 } 979 980 // A case with space between directive and comment. 981 unittest 982 { 983 auto case1 = tokenizedDirectives(` 984 /* Header comment. */ 985 986 #ifndef __FOO 987 #define __FOO 988 989 /* Comment before variable. */ 990 int variable; 991 992 #endif`).array; 993 994 assert(case1.length == 3); 995 } 996 997 // A case with comment after directive. 998 unittest 999 { 1000 auto case0 = tokenizedDirectives(` 1001 #ifndef FOO /* Comment. */ 1002 1003 #endif`).array; 1004 1005 assert(case0.length == 2); 1006 1007 assert(case0[0].length == 3); 1008 }