xref: /aosp_15_r20/external/clang/utils/TableGen/ClangCommentHTMLNamedCharacterReferenceEmitter.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- ClangCommentHTMLNamedCharacterReferenceEmitter.cpp -----------------=//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li //
10*67e74705SXin Li // This tablegen backend emits an fficient function to translate HTML named
11*67e74705SXin Li // character references to UTF-8 sequences.
12*67e74705SXin Li //
13*67e74705SXin Li //===----------------------------------------------------------------------===//
14*67e74705SXin Li 
15*67e74705SXin Li #include "llvm/ADT/SmallString.h"
16*67e74705SXin Li #include "llvm/Support/ConvertUTF.h"
17*67e74705SXin Li #include "llvm/TableGen/Error.h"
18*67e74705SXin Li #include "llvm/TableGen/Record.h"
19*67e74705SXin Li #include "llvm/TableGen/StringMatcher.h"
20*67e74705SXin Li #include "llvm/TableGen/TableGenBackend.h"
21*67e74705SXin Li #include <vector>
22*67e74705SXin Li 
23*67e74705SXin Li using namespace llvm;
24*67e74705SXin Li 
25*67e74705SXin Li /// \brief Convert a code point to the corresponding UTF-8 sequence represented
26*67e74705SXin Li /// as a C string literal.
27*67e74705SXin Li ///
28*67e74705SXin Li /// \returns true on success.
translateCodePointToUTF8(unsigned CodePoint,SmallVectorImpl<char> & CLiteral)29*67e74705SXin Li static bool translateCodePointToUTF8(unsigned CodePoint,
30*67e74705SXin Li                                      SmallVectorImpl<char> &CLiteral) {
31*67e74705SXin Li   char Translated[UNI_MAX_UTF8_BYTES_PER_CODE_POINT];
32*67e74705SXin Li   char *TranslatedPtr = Translated;
33*67e74705SXin Li   if (!ConvertCodePointToUTF8(CodePoint, TranslatedPtr))
34*67e74705SXin Li     return false;
35*67e74705SXin Li 
36*67e74705SXin Li   StringRef UTF8(Translated, TranslatedPtr - Translated);
37*67e74705SXin Li 
38*67e74705SXin Li   raw_svector_ostream OS(CLiteral);
39*67e74705SXin Li   OS << "\"";
40*67e74705SXin Li   for (size_t i = 0, e = UTF8.size(); i != e; ++i) {
41*67e74705SXin Li     OS << "\\x";
42*67e74705SXin Li     OS.write_hex(static_cast<unsigned char>(UTF8[i]));
43*67e74705SXin Li   }
44*67e74705SXin Li   OS << "\"";
45*67e74705SXin Li 
46*67e74705SXin Li   return true;
47*67e74705SXin Li }
48*67e74705SXin Li 
49*67e74705SXin Li namespace clang {
EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper & Records,raw_ostream & OS)50*67e74705SXin Li void EmitClangCommentHTMLNamedCharacterReferences(RecordKeeper &Records,
51*67e74705SXin Li                                                   raw_ostream &OS) {
52*67e74705SXin Li   std::vector<Record *> Tags = Records.getAllDerivedDefinitions("NCR");
53*67e74705SXin Li   std::vector<StringMatcher::StringPair> NameToUTF8;
54*67e74705SXin Li   SmallString<32> CLiteral;
55*67e74705SXin Li   for (std::vector<Record *>::iterator I = Tags.begin(), E = Tags.end();
56*67e74705SXin Li        I != E; ++I) {
57*67e74705SXin Li     Record &Tag = **I;
58*67e74705SXin Li     std::string Spelling = Tag.getValueAsString("Spelling");
59*67e74705SXin Li     uint64_t CodePoint = Tag.getValueAsInt("CodePoint");
60*67e74705SXin Li     CLiteral.clear();
61*67e74705SXin Li     CLiteral.append("return ");
62*67e74705SXin Li     if (!translateCodePointToUTF8(CodePoint, CLiteral)) {
63*67e74705SXin Li       SrcMgr.PrintMessage(Tag.getLoc().front(),
64*67e74705SXin Li                           SourceMgr::DK_Error,
65*67e74705SXin Li                           Twine("invalid code point"));
66*67e74705SXin Li       continue;
67*67e74705SXin Li     }
68*67e74705SXin Li     CLiteral.append(";");
69*67e74705SXin Li 
70*67e74705SXin Li     StringMatcher::StringPair Match(Spelling, CLiteral.str());
71*67e74705SXin Li     NameToUTF8.push_back(Match);
72*67e74705SXin Li   }
73*67e74705SXin Li 
74*67e74705SXin Li   emitSourceFileHeader("HTML named character reference to UTF-8 "
75*67e74705SXin Li                        "translation", OS);
76*67e74705SXin Li 
77*67e74705SXin Li   OS << "StringRef translateHTMLNamedCharacterReferenceToUTF8(\n"
78*67e74705SXin Li         "                                             StringRef Name) {\n";
79*67e74705SXin Li   StringMatcher("Name", NameToUTF8, OS).Emit();
80*67e74705SXin Li   OS << "  return StringRef();\n"
81*67e74705SXin Li      << "}\n\n";
82*67e74705SXin Li }
83*67e74705SXin Li 
84*67e74705SXin Li } // end namespace clang
85*67e74705SXin Li 
86