1 /**
2  * Copyright: Copyright (c) 2016 Wojciech Szęszoł. All rights reserved.
3  * Authors: Wojciech Szęszoł
4  * Version: Initial created: Jul 08, 2016
5  * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost Software License 1.0)
6  */
7 module dstep.translator.Preprocessor;
8 
9 import std.array;
10 
11 import clang.Cursor;
12 import clang.Index;
13 import clang.SourceLocation;
14 import clang.SourceRange;
15 import clang.Token;
16 import clang.TranslationUnit;
17 
18 public import dstep.translator.MacroDefinition;
19 public import dstep.translator.MacroParser;
20 
21 enum DirectiveKind
22 {
23     elif,
24     else_,
25     endif,
26     error,
27     define,
28     if_,
29     ifdef,
30     ifndef,
31     include,
32     line,
33     undef,
34     pragmaOnce,
35 }
36 
37 bool isIf(DirectiveKind kind)
38 {
39     return kind == DirectiveKind.if_ ||
40         kind == DirectiveKind.ifdef ||
41         kind == DirectiveKind.ifndef;
42 }
43 
44 class Directive
45 {
46     Token[] tokens;
47     SourceRange extent;
48     DirectiveKind kind;
49 
50     @property SourceLocation location()
51     {
52         return extent.start;
53     }
54 
55     override string toString()
56     {
57         import std.format : format;
58         return format("Directive(kind = %s)", kind);
59     }
60 }
61 
62 class ConditionalDirective : Directive
63 {
64     Expression condition;
65     Directive[] branches;
66     Directive endif;
67 }
68 
69 class PragmaDirective : Directive
70 {
71 }
72 
73 class DefineDirective : Directive
74 {
75     MacroDefinition macroDefinition;
76     alias macroDefinition this;
77 
78     this (MacroDefinition macroDefinition)
79     {
80         this.macroDefinition = macroDefinition;
81     }
82 }
83 
84 class UndefDirective : Directive
85 {
86     string identifier;
87 }
88 
89 struct TokenizedDirectiveRange
90 {
91     string source;
92     Token[] tokens;
93     Token[] result;
94 
95     this(Token[] tokens, string source = null)
96     {
97         this.tokens = tokens;
98         this.source = source;
99 
100         popFront();
101     }
102 
103     @property bool empty() const
104     {
105         return result.empty;
106     }
107 
108     @property Token[] front()
109     {
110         return result;
111     }
112 
113     void popFront()
114     {
115         findNext();
116         yield();
117     }
118 
119     @property string toString() const
120     {
121         return "TokenizedDirectiveRange(..)";
122     }
123 
124     private void findNext()
125     {
126         if (1 < tokens.length &&
127             tokens[0].spelling == "#" &&
128             isDirective(tokens[1]))
129             return;
130 
131         while (2 < tokens.length &&
132             (tokens[1].spelling != "#" ||
133             tokens[0].location.line >= tokens[1].location.line ||
134             !isDirective(tokens[2])))
135             tokens.popFront();
136 
137         if (2 < tokens.length)
138             tokens.popFront();
139         else
140             tokens = Token[].init;
141     }
142 
143     private size_t countEscaped(string range)
144     {
145         return 0;
146     }
147 
148     private void yield()
149     {
150         result = Token[].init;
151 
152         size_t itr = 0;
153 
154         while (itr + 1 < tokens.length)
155         {
156             auto loc0 = tokens[itr].location;
157             auto loc1 = tokens[itr + 1].location;
158 
159             ptrdiff_t diff = loc1.line - loc0.line;
160 
161             if (0 < diff &&
162                 (source == null ||
163                 countEscaped(source[loc0.offset .. loc1.offset]) < diff))
164                 break;
165 
166             ++itr;
167         }
168 
169         if (itr + 1 < tokens.length)
170         {
171             result = tokens[0 .. itr + 1];
172             tokens = tokens[itr + 1 .. $];
173         }
174         else
175         {
176             result = tokens;
177             tokens = Token[].init;
178         }
179     }
180 
181     private static bool isDirective(Token token)
182     {
183         switch (token.spelling)
184         {
185             case "elif":
186             case "else":
187             case "endif":
188             case "error":
189             case "define":
190             case "if":
191             case "ifdef":
192             case "ifndef":
193             case "include":
194             case "line":
195             case "pragma":
196             case "undef":
197                 return true;
198 
199             default:
200                 return false;
201         }
202     }
203 }
204 
205 TokenizedDirectiveRange tokenizedDirectives(Token[] tokens, string source = null)
206 {
207     return TokenizedDirectiveRange(tokens, source);
208 }
209 
210 TokenizedDirectiveRange tokenizedDirectives(string source)
211 {
212     return tokenizedDirectives(tokenizeNoComments(source), source);
213 }
214 
215 struct DirectiveRange
216 {
217     TranslationUnit translUnit;
218     TokenizedDirectiveRange tokensRange;
219     Directive front_;
220 
221     this(TranslationUnit translUnit)
222     {
223         tokensRange = tokenizedDirectives(translUnit.tokensNoComments, translUnit.source);
224 
225         this.translUnit = translUnit;
226 
227         popFront();
228     }
229 
230     @property bool empty() const
231     {
232         return front_ is null;
233     }
234 
235     @property Directive front()
236     {
237         return front_;
238     }
239 
240     void popFront()
241     {
242         front_ = parseDirective(tokensRange.front);
243         tokensRange.popFront();
244 
245         while (!tokensRange.empty && !front_)
246         {
247             front_ = parseDirective(tokensRange.front);
248             tokensRange.popFront();
249         }
250     }
251 
252     @property string toString() const
253     {
254         return "DirectiveRange(..)";
255     }
256 
257     bool acceptDirective(alias spelling)(ref Token[] tokens)
258     {
259         if (1 < tokens.length &&
260             tokens[0].spelling == "#" &&
261             tokens[1].spelling == spelling)
262         {
263             tokens = tokens[2 .. $];
264             return true;
265         }
266 
267         return false;
268     }
269 
270     Expression parseIf(Token[] tokens)
271     {
272         if (acceptDirective!"if"(tokens))
273         {
274             auto expr = parseExpr(tokens, true);
275 
276             if (expr.hasValue && tokens.empty)
277                 return expr;
278         }
279 
280         return Expression.init;
281     }
282 
283     Expression parseIfdef(Token[] tokens)
284     {
285         string spelling;
286 
287         if (tokens.length == 3 &&
288             acceptDirective!"ifdef"(tokens) &&
289             acceptIdentifier(tokens, spelling))
290         {
291             auto expr = DefinedExpr();
292             expr.identifier = spelling;
293 
294             return Expression(expr);
295         }
296 
297         return Expression.init;
298     }
299 
300     Expression parseIfndef(Token[] tokens)
301     {
302         string spelling;
303 
304         if (tokens.length == 3 &&
305             acceptDirective!"ifndef"(tokens) &&
306             acceptIdentifier(tokens, spelling))
307         {
308             auto defined = DefinedExpr();
309             defined.identifier = spelling;
310 
311             auto expr = new UnaryExpr();
312             expr.subexpr = defined;
313             expr.operator = "!";
314 
315             return Expression(expr);
316         }
317 
318         return Expression.init;
319     }
320 
321     Expression parseElif(Token[] tokens)
322     {
323         if (acceptDirective!"elif"(tokens))
324         {
325             auto expr = parseExpr(tokens, true);
326 
327             if (expr.hasValue && tokens.empty)
328                 return expr;
329         }
330 
331         return Expression.init;
332     }
333 
334     Expression parseIfCombined(ref DirectiveKind kind, Token[] tokens)
335     {
336         Expression expr;
337 
338         expr = parseIf(tokens);
339         if (expr.hasValue)
340         {
341             kind = DirectiveKind.if_;
342             return expr;
343         }
344 
345         expr = parseIfdef(tokens);
346         if (expr.hasValue)
347         {
348             kind = DirectiveKind.ifdef;
349             return expr;
350         }
351 
352         expr = parseIfndef(tokens);
353         if (expr.hasValue)
354         {
355             kind = DirectiveKind.ifndef;
356             return expr;
357         }
358 
359         expr = parseElif(tokens);
360         if (expr.hasValue)
361         {
362             kind = DirectiveKind.elif;
363             return expr;
364         }
365 
366         return Expression.init;
367     }
368 
369     bool parseElse(ref DirectiveKind kind, Token[] tokens)
370     {
371         kind = DirectiveKind.else_;
372         return acceptDirective!"else"(tokens) && tokens.empty;
373     }
374 
375     bool parseEndif(ref DirectiveKind kind, Token[] tokens)
376     {
377         kind = DirectiveKind.endif;
378         return acceptDirective!"endif"(tokens) && tokens.empty;
379     }
380 
381     private Directive parseConditional(Token[] tokens)
382     {
383         DirectiveKind kind;
384 
385         auto expr = parseIfCombined(kind, tokens);
386 
387         if (expr.hasValue)
388         {
389             auto directive = new ConditionalDirective;
390             directive.kind = kind;
391             directive.condition = expr;
392             directive.tokens = tokens;
393             directive.extent = tokens.extent;
394             return directive;
395         }
396 
397         if (parseElse(kind, tokens) || parseEndif(kind, tokens))
398         {
399             auto directive = new Directive;
400             directive.kind = kind;
401             directive.tokens = tokens;
402             directive.extent = tokens.extent;
403             return directive;
404         }
405 
406         return null;
407     }
408 
409     private Directive parseDefine(Token[] tokens, Cursor[string] table)
410     {
411         auto local = tokens;
412 
413         if (!accept!("#")(local, TokenKind.punctuation))
414             return null;
415 
416         if (!accept!("define")(local, TokenKind.identifier))
417             return null;
418 
419         MacroDefinition result = parsePartialMacroDefinition(local, table, true);
420 
421         if (result !is null)
422             tokens = local;
423 
424         if (tokens.empty)
425             return new DefineDirective(result);
426 
427         return null;
428     }
429 
430     private Directive parseDefine(Token[] tokens)
431     {
432         Cursor[string] table;
433 
434         return parseDefine(tokens, table);
435     }
436 
437     private Directive parseError(Token[] tokens)
438     {
439         return null;
440     }
441 
442     Directive parseUndef(Token[] tokens)
443     {
444         string spelling;
445 
446         if (tokens.length == 3 &&
447             acceptDirective!"undef"(tokens) &&
448             acceptIdentifier(tokens, spelling))
449         {
450             auto expr = new UndefDirective();
451             expr.identifier = spelling;
452 
453             return expr;
454         }
455 
456         return null;
457     }
458 
459     Directive parseInclude(Token[] tokens)
460     {
461         return null;
462     }
463 
464     Directive parseLine(Token[] tokens)
465     {
466         return null;
467     }
468 
469     Directive parsePragma(Token[] tokens)
470     {
471         if (acceptDirective!"pragma"(tokens))
472         {
473             if (tokens.length == 1)
474             {
475                 if (tokens[0].spelling == "once")
476                 {
477                     auto directive = new PragmaDirective();
478                     directive.kind = DirectiveKind.pragmaOnce;
479                     directive.tokens = tokens;
480                     directive.extent = tokens.extent;
481                     return directive;
482                 }
483             }
484         }
485 
486         return null;
487     }
488 
489     private Directive parseDirective(Token[] tokens)
490     {
491         if (auto directive = parseConditional(tokens))
492             return directive;
493         else if (auto directive = parseDefine(tokens))
494             return directive;
495         else if (auto directive = parseError(tokens))
496             return directive;
497         else if (auto directive = parseInclude(tokens))
498             return directive;
499         else if (auto directive = parseLine(tokens))
500             return directive;
501         else if (auto directive = parsePragma(tokens))
502             return directive;
503         else if (auto directive = parseUndef(tokens))
504             return directive;
505         else
506             return null;
507     }
508 }
509 
510 void updateConditions(Directive[] directives)
511 {
512     void update(ref Directive[] directives)
513     {
514         Directive[] branches = [ directives.front ];
515         directives.popFront();
516 
517         while (!directives.empty)
518         {
519             if (directives.front.kind == DirectiveKind.endif)
520             {
521                 foreach (branch; branches)
522                 {
523                     if (auto conditional = cast(ConditionalDirective) branch)
524                     {
525                         conditional.branches = branches;
526                         conditional.endif = directives.front;
527                     }
528                 }
529 
530                 directives.popFront();
531                 break;
532             }
533             else if (
534                 directives.front.kind == DirectiveKind.elif ||
535                 directives.front.kind == DirectiveKind.else_)
536             {
537                 branches ~= directives.front;
538                 directives.popFront();
539             }
540             else if (directives.front.kind.isIf)
541             {
542                 update(directives);
543             }
544             else
545             {
546                 directives.popFront();
547             }
548         }
549     }
550 
551     void updateTopLevel(ref Directive[] directives)
552     {
553         while (!directives.empty)
554         {
555             if (directives.front.kind.isIf)
556                 update(directives);
557             else
558                 directives.popFront();
559         }
560     }
561 
562     updateTopLevel(directives);
563 }
564 
565 Directive[] directives(TranslationUnit translUnit)
566 {
567     auto directives = DirectiveRange(translUnit).array;
568 
569     updateConditions(directives);
570 
571     return directives;
572 }
573 
574 Directive[] directives(string source)
575 {
576     import std.array : array;
577 
578     Index index = Index(false, false);
579 
580     return directives(TranslationUnit.parseString(index, source));
581 }
582 
583 unittest
584 {
585     import std.array : array;
586 
587     auto x0 = tokenizedDirectives("").array;
588 
589     assert(x0.length == 0);
590 
591 
592     auto x1 = tokenizedDirectives("int x = 3;").array;
593 
594     assert(x1.length == 0);
595 
596 
597     auto x2 = tokenizedDirectives(q"C
598 int x = 3;
599 
600 int f()
601 {
602     return 42;
603 }
604 C").array;
605 
606     assert(x2.length == 0);
607 
608 
609     auto x3 = tokenizedDirectives(q"C
610 #define FOO
611 C").array;
612 
613     assert(x3.length == 1);
614 
615 
616     auto x4 = tokenizedDirectives(q"C
617 #define FOO 0
618 
619 #define BAR 1
620 
621 #define BAZ 2
622 C").array;
623 
624     assert(x4.length == 3);
625 
626 
627     auto x5 = tokenizedDirectives(q"C
628 #if FOO == 0
629 
630 #elif FOO == 1
631 
632 #else
633 
634 #endif
635 C").array;
636 
637     assert(x5.length == 4);
638 
639 
640     auto x6 = tokenizedDirectives(q"C
641 #ifdef FOO
642 
643 #endif
644 
645 #ifndef FOO
646 
647 #endif
648 C").array;
649 
650     assert(x6.length == 4);
651 
652 
653     auto x7 = tokenizedDirectives(q"C
654 #pragma once
655 #include <stdio.h>
656 #define FOO
657 C").array;
658 
659     assert(x7.length == 3);
660 
661 
662     auto x8 = tokenizedDirectives(q"C
663 #pragma once
664 #line 44
665 C").array;
666 
667     assert(x8.length == 2);
668 
669 
670     auto x9 = tokenizedDirectives("#pragma once").array;
671 
672     assert(x9.length == 1);
673     assert(x9[0][0].spelling == "#");
674     assert(x9[0][1].spelling == "pragma");
675     assert(x9[0][2].spelling == "once");
676 
677 
678     auto x10 = tokenizedDirectives(q"C
679 #define FOO 0
680 #define BAR 1
681 #define BAZ 2
682 C").array;
683 
684     assert(x10.length == 3);
685     assert(x10[0][0].spelling == "#");
686     assert(x10[0][1].spelling == "define");
687     assert(x10[0][2].spelling == "FOO");
688     assert(x10[0][3].spelling == "0");
689 
690     assert(x10.length == 3);
691     assert(x10[1][0].spelling == "#");
692     assert(x10[1][1].spelling == "define");
693     assert(x10[1][2].spelling == "BAR");
694     assert(x10[1][3].spelling == "1");
695 
696     assert(x10.length == 3);
697     assert(x10[2][0].spelling == "#");
698     assert(x10[2][1].spelling == "define");
699     assert(x10[2][2].spelling == "BAZ");
700     assert(x10[2][3].spelling == "2");
701 }
702 
703 unittest
704 {
705     auto x0 = directives(``);
706 
707     assert(x0.length == 0);
708 }
709 
710 unittest
711 {
712     auto x0 = directives(`#define FOO`);
713 
714     assert(x0.length == 1);
715     assert(cast(DefineDirective) x0[0]);
716 
717     auto foo = cast(DefineDirective) x0[0];
718 
719     assert(foo.macroDefinition.spelling == "FOO");
720 }
721 
722 unittest
723 {
724     auto x0 = directives(`#pragma once`);
725 
726     assert(x0.length == 1);
727     assert(cast(PragmaDirective) x0[0]);
728 
729     auto foo = cast(PragmaDirective) x0[0];
730 
731     assert(foo.kind == DirectiveKind.pragmaOnce);
732 }
733 
734 // Test parsing of basic conditions.
735 unittest
736 {
737     auto case0 = directives(`
738     #ifndef FOO
739 
740     #endif`);
741 
742     assert(case0.length == 2);
743     assert(case0[0].kind == DirectiveKind.ifndef);
744     assert(case0[1].kind == DirectiveKind.endif);
745 
746     auto cond0 = cast(ConditionalDirective) case0[0];
747 
748     assert(cond0);
749 
750     auto unary0 = cond0.condition.peek!UnaryExpr;
751 
752     assert(unary0);
753     assert(unary0.operator == "!");
754 
755     auto defined0 = unary0.subexpr.peek!DefinedExpr;
756 
757     assert(defined0);
758     assert(defined0.identifier == "FOO");
759 
760 
761     auto case1 = directives(`
762     #ifdef FOO
763 
764     #endif`);
765 
766     assert(case1.length == 2);
767     assert(cast(ConditionalDirective) case1[0]);
768 
769     auto cond1 = cast(ConditionalDirective) case1[0];
770 
771     assert(cond1);
772     assert(cond1.condition.hasValue);
773 
774 
775     auto case2 = directives(`
776     #if 1
777 
778     #endif`);
779 
780     assert(case2.length == 2);
781     assert(cast(ConditionalDirective) case2[0]);
782 
783     auto cond2 = cast(ConditionalDirective) case2[0];
784 
785     assert(cond2);
786     assert(cond2.condition.hasValue);
787 }
788 
789 // Test parsing of multi-branch directives.
790 unittest
791 {
792     auto case0 = directives(`
793     #if FOO
794 
795     #elif BAR
796 
797     #elif BAZ
798 
799     #else
800 
801     #endif`);
802 
803     assert(case0.length == 5);
804 
805     assert(case0[0].kind == DirectiveKind.if_);
806     assert(case0[1].kind == DirectiveKind.elif);
807     assert(case0[2].kind == DirectiveKind.elif);
808     assert(case0[3].kind == DirectiveKind.else_);
809     assert(case0[4].kind == DirectiveKind.endif);
810 
811     auto cond0 = cast(ConditionalDirective) case0[0];
812     auto cond1 = cast(ConditionalDirective) case0[1];
813     auto cond2 = cast(ConditionalDirective) case0[2];
814 
815     assert(cond0);
816     assert(cond1);
817     assert(cond2);
818 
819     auto id0 = cond0.condition.peek!Identifier;
820     auto id1 = cond1.condition.peek!Identifier;
821     auto id2 = cond2.condition.peek!Identifier;
822 
823     assert(id0);
824     assert(id0.spelling == "FOO");
825     assert(id1);
826     assert(id1.spelling == "BAR");
827     assert(id2);
828     assert(id2.spelling == "BAZ");
829 }
830 
831 // Parse `defined` operator.
832 unittest
833 {
834     auto case0 = directives(`
835     #if defined FOO
836 
837     #endif`);
838 
839     auto cond0 = cast(ConditionalDirective) case0[0];
840 
841     assert(cond0);
842 
843 
844     auto case1 = directives(`
845     #if defined(FOO)
846 
847     #endif`);
848 
849     auto cond1 = cast(ConditionalDirective) case1[0];
850 
851     assert(cond1);
852 
853     auto expr = cond1.condition.peek!DefinedExpr;
854 
855     assert(expr);
856     assert(expr.identifier == "FOO");
857 }
858 
859 // Test if branch pointer are arranged correctly.
860 unittest
861 {
862     auto case0 = directives(`
863     #if 1
864 
865     #endif`);
866 
867     auto if0 = cast(ConditionalDirective) case0[0];
868 
869     assert(if0);
870     assert(if0.branches.length == 1);
871     assert(if0.endif == case0[1]);
872 
873 
874     auto case1 = directives(`
875     #if 1
876 
877     #else
878 
879     #endif`);
880 
881     auto if1 = cast(ConditionalDirective) case1[0];
882 
883     assert(if1);
884     assert(if1.branches.length == 2);
885     assert(if1.endif == case1[2]);
886 
887 
888     auto case2 = directives(`
889     #if 1
890 
891     #elif defined FOO
892 
893     #else
894 
895     #endif`);
896 
897     auto if2 = cast(ConditionalDirective) case2[0];
898 
899     assert(if2);
900     assert(if2.branches.length == 3);
901     assert(if2.branches[0] == case2[0]);
902     assert(if2.branches[1] == case2[1]);
903     assert(if2.branches[2] == case2[2]);
904     assert(if2.endif == case2[3]);
905 
906 
907     auto case3 = directives(`
908     #if 1
909 
910     #elif defined FOO
911 
912     #else
913 
914     #endif
915 
916     #define BAR
917     #undef BAR
918     #define BAZ
919 
920     #if 0
921 
922     #else
923 
924     #endif
925 
926     #define FUN(x, y) x + y`);
927 
928     assert(case3.length == 11);
929 
930     auto if3_0 = cast(ConditionalDirective) case3[0];
931     auto if3_1 = cast(ConditionalDirective) case3[7];
932 
933     assert(if3_0);
934     assert(if3_1);
935 
936     assert(if3_0.branches.length == 3);
937     assert(if3_1.branches.length == 2);
938 
939     assert(if3_0.branches[0] == case3[0]);
940     assert(if3_0.branches[1] == case3[1]);
941     assert(if3_0.branches[2] == case3[2]);
942     assert(if3_0.endif == case3[3]);
943 
944     assert(if3_1.branches[0] == case3[7]);
945     assert(if3_1.branches[1] == case3[8]);
946     assert(if3_1.endif == case3[9]);
947 
948 
949     auto case4 = directives(`
950     #if 1
951 
952     #elif defined FOO
953 
954         #ifdef BAR
955 
956         #else
957 
958         #endif
959 
960     #else
961 
962     #endif`);
963 
964 
965     assert(case4.length == 7);
966 
967     auto if4_0 = cast(ConditionalDirective) case4[0];
968     auto if4_1 = cast(ConditionalDirective) case4[2];
969 
970     assert(if4_0.branches[0] == case4[0]);
971     assert(if4_0.branches[1] == case4[1]);
972     assert(if4_0.branches[2] == case4[5]);
973     assert(if4_0.endif == case4[6]);
974 
975     assert(if4_1.branches[0] == case4[2]);
976     assert(if4_1.branches[1] == case4[3]);
977     assert(if4_1.endif == case4[4]);
978 }
979 
980 // A case with space between directive and comment.
981 unittest
982 {
983     auto case1 = tokenizedDirectives(`
984     /* Header comment. */
985 
986     #ifndef __FOO
987     #define __FOO
988 
989     /* Comment before variable. */
990     int variable;
991 
992     #endif`).array;
993 
994     assert(case1.length == 3);
995 }
996 
997 // A case with comment after directive.
998 unittest
999 {
1000     auto case0 = tokenizedDirectives(`
1001     #ifndef FOO /* Comment. */
1002 
1003     #endif`).array;
1004 
1005     assert(case0.length == 2);
1006 
1007     assert(case0[0].length == 3);
1008 }