xref: /MusicPlayer2/MusicPlayer2/Lyric.cpp (revision e51d77514999ce70db10740029cbbfec98e7c854)
1 #include "stdafx.h"
2 #include "Lyric.h"
3 #include "FilePathHelper.h"
4 #include "MusicPlayer2.h"
5 
6 const vector<wstring> CLyrics::m_surpported_lyric{ L"lrc", L"ksc", L"vtt" };
7 
CLyrics(const wstring & file_name,const LyricType & lyric_type)8 CLyrics::CLyrics(const wstring& file_name, const LyricType& lyric_type) : m_file{ file_name }, m_lyric_type{ lyric_type }
9 {
10     // 当未指定歌词类型时按扩展名处理
11     if (m_lyric_type == LyricType::LY_AUTO)
12     {
13         CFilePathHelper path{ m_file };
14         wstring ext{ path.GetFileExtension() };
15         if (ext == L"lrc")
16             m_lyric_type = LyricType::LY_LRC;
17         else if (ext == L"ksc")
18             m_lyric_type = LyricType::LY_KSC;
19         else if (ext == L"vtt")
20             m_lyric_type = LyricType::LY_VTT;
21         else
22             m_lyric_type = LyricType::LY_LRC;   // 无匹配时默认为lrc
23     }
24 
25     string lyric_str;
26     if (!CCommon::GetFileContent(m_file.c_str(), lyric_str)) return;            // 读取歌词文件内容
27     m_code_type = CCommon::JudgeCodeType(lyric_str, m_code_type, true);         // 判断编码格式
28     wstring lyric_wcs = CCommon::StrToUnicode(lyric_str, m_code_type, true);    // 转换成Unicode
29 
30     LyricsFromRowString(lyric_wcs, m_lyric_type);
31 }
32 
LyricsFromRowString(const wstring & lyric_str,const LyricType & lyric_type)33 void CLyrics::LyricsFromRowString(const wstring& lyric_str, const LyricType& lyric_type)
34 {
35     ASSERT(lyric_type != LyricType::LY_AUTO);
36 
37     CCommon::StringSplitLine(lyric_str, m_lyrics_str, false, true);
38 
39     m_lyric_type = lyric_type;
40     // 按歌词类型调用解析方法
41     switch (m_lyric_type)
42     {
43     case CLyrics::LyricType::LY_LRC_NETEASE:
44         DisposeLrcNetease();
45         break;
46     case CLyrics::LyricType::LY_KSC:
47         DisposeKsc();
48         break;
49     case CLyrics::LyricType::LY_VTT:
50         m_code_type = CodeType::UTF8;   // WebVTT格式只能是UTF8内码,BOM是可选的
51         DisposeWebVTT();
52         break;
53     default:
54         DisposeLrc();
55         break;
56     }
57     // 解析完成后规范歌词
58     NormalizeLyric();
59 }
60 
FileIsLyric(const wstring & file_name)61 bool CLyrics::FileIsLyric(const wstring& file_name)
62 {
63     CFilePathHelper file_path(file_name);
64     wstring extension{ file_path.GetFileExtension() };		// 获取文件扩展名
65     for (const auto& ext : CLyrics::m_surpported_lyric)		// 判断文件扩展是否在支持的扩展名列表里
66     {
67         if (ext == extension)
68             return true;
69     }
70     return false;
71 }
72 
ParseLyricTimeTag(const wstring & lyric_text,Time & time,int & pos_start,int & pos_end,wchar_t bracket_left,wchar_t bracket_right)73 bool CLyrics::ParseLyricTimeTag(const wstring& lyric_text, Time& time, int& pos_start, int& pos_end, wchar_t bracket_left, wchar_t bracket_right)
74 {
75     int index = pos_end - 1;
76     bool time_acquired{ false };
77     while (!time_acquired)
78     {
79         index = lyric_text.find_first_of(bracket_left, index + 1);                 // 查找第1个左中括号
80         if (index == string::npos) break;                                  // 没有找到左中括号,退出循环
81         else if (index > static_cast<int>(lyric_text.size() - 9)) break;   // 找到了左中括号,但是左中括号在字符串的倒数第9个字符以后,也退出循环
82         else if ((lyric_text[index + 1] > L'9' || lyric_text[index + 1] < L'0') && lyric_text[index + 1] != L'-') continue;     // 找到了左中括号,但是左中括号后面不是数字也不是负号,退出本次循环,继续查找该行中下一个左中括号
83 
84         int index1, index2, index3;                                         // 歌词标签中冒号、圆点和右中括号的位置
85         index1 = lyric_text.find_first_of(L':', index);                // 查找从左中括号开始第1个冒号的位置
86         index2 = lyric_text.find_first_of(L".:", index1 + 1);          // 查找从第1个冒号开始第1个圆点或冒号的位置(秒钟和毫秒数应该用圆点分隔,这里也兼容用冒号分隔的歌词)
87         index3 = lyric_text.find_first_of(bracket_right, index2 + 1);                // 查找右中括号的位置
88         if (index1 == string::npos || index2 == string::npos || index3 == string::npos) continue;
89         wstring temp = lyric_text.substr(index + 1, index1 - index - 1);       // 获取时间标签的分钟数
90         time.min = _wtoi(temp.c_str());
91         temp = lyric_text.substr(index1 + 1, index2 - index1 - 1);     // 获取时间标签的秒钟数
92         time.sec = _wtoi(temp.c_str());
93         temp = lyric_text.substr(index2 + 1, index3 - index2 - 1);     // 获取时间标签的毫秒数
94         int char_cnt = temp.size();                                         // 毫秒数的位数
95         if (char_cnt > 0 && temp[0] == L'-')                                // 如果毫秒数的前面有负号,则位数减1
96             char_cnt--;
97         switch (char_cnt)
98         {
99         case 0:
100             time.msec = 0;
101         case 1:
102             time.msec = _wtoi(temp.c_str()) * 100;
103             break;
104         case 2:
105             time.msec = _wtoi(temp.c_str()) * 10;
106             break;
107         default:
108             time.msec = _wtoi(temp.c_str()) % 1000;
109             break;
110         }
111         time_acquired = true;
112         pos_start = index;
113         pos_end = index3 + 1;
114     }
115     return time_acquired;
116 }
117 
DisposeLrc()118 void CLyrics::DisposeLrc()
119 {
120     int index,index2;
121     m_translate = false;
122     for (const auto& str : m_lyrics_str)
123     {
124         Lyric lyric;
125         index = str.find(L"[");
126         index2 = str.find(L']', index);
127         if (index2 == wstring::npos) continue;  // 略过没有右方括号的行
128         // 查找id标签(由于id标签是我自己加上的,它永远只会出现在第一行)
129         if (!m_id_tag)
130         {
131             index = str.find(L"[id:");
132             if (index != string::npos)
133             {
134                 m_id_tag = true;
135                 m_id = str.substr(index + 4, index2 - index - 4);
136             }
137         }
138         // 查找ti标签
139         if (!m_ti_tag)
140         {
141             index = str.find(L"[ti:");
142             if (index != string::npos)
143             {
144                 m_ti_tag = true;
145                 m_ti = str.substr(index + 4, index2 - index - 4);
146             }
147         }
148         // 查找ar标签
149         if (!m_ar_tag)
150         {
151             index = str.find(L"[ar:");
152             if (index != string::npos)
153             {
154                 m_ar_tag = true;
155                 m_ar = str.substr(index + 4, index2 - index - 4);
156             }
157         }
158         // 查找al标签
159         if (!m_al_tag)
160         {
161             index = str.find(L"[al:");
162             if (index != string::npos)
163             {
164                 m_al_tag = true;
165                 m_al = str.substr(index + 4, index2 - index - 4);
166             }
167         }
168         // 查找by标签
169         if (!m_by_tag)
170         {
171             index = str.find(L"[by:");
172             if (index != string::npos)
173             {
174                 m_by_tag = true;
175                 m_by = str.substr(index + 4, index2 - index - 4);
176             }
177         }
178         // 获取偏移量
179         if (!m_offset_tag)
180         {
181             index = str.find(L"[offset:");      // 查找偏移量标签
182             if (index != string::npos)
183             {
184                 m_offset_tag = true;
185                 m_offset = _wtoi(str.substr(index + 8, index2 - index - 8).c_str());             // 获取偏移量
186             }
187         }
188 
189         Time t{};
190         int pos_start{}, pos_end{};
191         wchar_t bracket_left{ L'[' }, bracket_right{ L']' };
192         if (ParseLyricTimeTag(str, t, pos_start, pos_end, L'<', L'>'))
193         {
194             // 如果发现存在尖括号时间标签那么按ESLyric 0.5.x解析,丢弃首个[]时间标签
195             bracket_left = L'<';
196             bracket_right = L'>';
197         }
198         t.fromInt(0);   // 重置搜索状态
199         pos_start = pos_end = 0;
200         if (ParseLyricTimeTag(str, t, pos_start, pos_end, bracket_left, bracket_right))      // 查找首个时间标签,存在时间标签的行才被视为歌词(扩展lrc需要首个时间标签来将字标签转换为word_time)
201         {
202             // 解析歌词行
203             wstring time_str, text_str;
204             index = str.find_first_not_of(L"[]<>:.0123456789-", pos_end); // 用来分离行起始时间标签(可能是连续的压缩时间标签)
205             index = str.rfind(bracket_right, index) + 1;                         // 避免截取到歌词开头的数字
206             if (index != wstring::npos)
207             {
208                 time_str = str.substr(0, index);
209                 text_str = str.substr(index);
210             }
211             else
212                 time_str = str;
213             CCommon::StringNormalize(text_str);
214             if (!text_str.empty())                              // 这个if对应的else即空行,无须处理text和translate
215             {
216                 index = text_str.find(L" / ");                  // 提取翻译
217                 if (index != wstring::npos)                     // 如果找到了‘ / ’,说明该句歌词包含翻译
218                 {
219                     lyric.translate = text_str.substr(index + 3);
220                     text_str = text_str.substr(0, index);
221                     m_translate = true; // 由于前面的StringNormalize操作,不可能出现“ / "后面为空的情况,无须重新判断翻译是否为空
222                 }
223                 index = index2 = 0;
224                 Time time_w, time_w_;
225                 if (ParseLyricTimeTag(text_str, time_w_, index, index2, bracket_left, bracket_right))    // 歌词文本内含有时间标签说明是扩展lrc
226                 {
227                     lyric.text = text_str.substr(0, index);
228                     lyric.split.push_back(lyric.text.size());
229                     lyric.word_time.push_back(time_w_ - t);
230                     int last_pos_end = index2;
231                     while (ParseLyricTimeTag(text_str, time_w, index, index2, bracket_left, bracket_right))
232                     {
233                         lyric.text += text_str.substr(last_pos_end, index - last_pos_end);
234                         lyric.split.push_back(lyric.text.size());
235                         lyric.word_time.push_back(time_w - time_w_);
236                         last_pos_end = index2;
237                         time_w_ = time_w;
238                     }
239                     if (index2 < static_cast<int>(text_str.size()))     // 如果最后的时间标签并非结尾,那么将text_str剩下的字符作为没有显式提供时长的一个匀速段加入
240                     {
241                         lyric.text += text_str.substr(index2);
242                         lyric.split.push_back(lyric.text.size());
243                         lyric.word_time.push_back(-1);
244                     }
245                 }
246                 else
247                     lyric.text = text_str;
248             }
249             do
250             {
251                 lyric.time_start_raw = t.toInt();
252                 m_lyrics.push_back(lyric);
253             } while (ParseLyricTimeTag(time_str, t, pos_start, pos_end, L'[', L']'));   // 压缩lrc在此展开(压缩lrc只能是方括号)
254         }
255     }
256     // CombineSameTimeLyric()这行应当移动到DisposeLrcNetease()里面,但会影响早期下载的歌词的兼容性。虽然此方法已支持参数但不应在此设置默认合并误差
257     // 理由是歌词偏移量调整时若负偏移量导致歌词在时间0处堆积则现有代码会将它们拉开10ms间距存储如果这里出现10及以上的参数会误合并
258     CombineSameTimeLyric();
259 }
260 
DisposeLrcNetease()261 void CLyrics::DisposeLrcNetease()
262 {
263     for (size_t i{}; i < m_lyrics_str.size(); ++i)
264     {
265         // 仅处理网易歌词原文,这行同时保证[i + 1]不会越界
266         if (m_lyrics_str[i].find(L"},\"klyric\":{") != wstring::npos) break;
267         // 歌词行间缺少\n(单行多段时间标签)时按时间标签主动分割
268         // 将 "[xx:xx.xx]?????#[xx:xx.xx]?????#" 于第一个#后拆分,同时不处理压缩lrc "[xx:xx.xx][xx:xx.xx][xx:xx.xx]?????#"
269         size_t index{ m_lyrics_str[i].find_first_not_of(L"[:.]0123456789-") };   // 分离行起始时间标签(可能是连续的压缩时间标签)
270         size_t index1{ m_lyrics_str[i].find(L"[", index) };                 // 网易的歌词非扩展lrc,如果出现第二段时间标签则放入下一行
271         if (index1 != wstring::npos)
272         {
273             // 判断是否为时间标签,避免误分割
274             size_t index2{ m_lyrics_str[i].find_first_not_of(L"[:.]0123456789-", index1) };
275             if (index2 != wstring::npos && index2 != index1 + 1)
276             {
277                 m_lyrics_str.emplace(m_lyrics_str.begin() + i, m_lyrics_str[i].substr(0, index1));
278                 m_lyrics_str[i + 1] = m_lyrics_str[i + 1].substr(index1);
279             }
280         }
281         // 有重复时间标签的(第一个内容空白,歌词在第二个里),此处将其内容合并。
282         index = m_lyrics_str[i].rfind(L"]", index) + 1;                     // 避免截取到歌词开头的数字,同时也避开非时间标签的[id:xxx],[ti:xxx]等
283         if (index == wstring::npos || index == 0) continue;
284         if (!m_lyrics_str[i].compare(0,index,m_lyrics_str[i + 1],0,index))
285         {
286             m_lyrics_str[i] += m_lyrics_str[i + 1].substr(index);
287             m_lyrics_str.erase(m_lyrics_str.begin() + i + 1);
288         }
289     }
290     DisposeLrc();
291     DeleteRedundantLyric();
292 }
293 
DisposeKsc()294 void CLyrics::DisposeKsc()
295 {
296     m_translate = false;
297     m_lyrics.clear();
298     for (int i{}; i < static_cast<int>(m_lyrics_str.size()); ++i)
299     {
300         int index, index2;
301         Lyric lyric;
302         Time time;
303         index = m_lyrics_str[i].find(L"//");        // 移除注释
304         wstring str = m_lyrics_str[i].substr(0, index);
305         CCommon::StringNormalize(str);
306 
307         if (!m_ti_tag && str.find(L"karaoke.songname") != wstring::npos)
308         {
309             index = str.find(L"\'");
310             index2 = str.find(L"\'", index + 1);
311             if (index == wstring::npos || index2 == wstring::npos || index2 - index < 1) continue;
312             m_ti = str.substr(index + 1, index2 - index - 1);
313             m_ti_tag = true;
314         }
315         if (!m_ar_tag && str.find(L"karaoke.singer") != wstring::npos)
316         {
317             index = str.find(L"\'");
318             index2 = str.find(L"\'", index + 1);
319             if (index == wstring::npos || index2 == wstring::npos || index2 - index < 1) continue;
320             m_ar = str.substr(index + 1, index2 - index - 1);
321             m_ar_tag = true;
322         }
323 
324         // 解析含有karaoke.add的歌词行
325         if (str.find(L"karaoke.add") != wstring::npos)
326         {
327             index = index2 = 0;
328             if (!ParseLyricTimeTag(str, time, index, index2, L'\'', L'\'')) continue;
329             lyric.time_start_raw = time.toInt();
330             if (!ParseLyricTimeTag(str, time, index, index2, L'\'', L'\'')) continue;
331             lyric.time_span_raw = time.toInt() - lyric.time_start_raw;
332             index = str.find(L"\'", index2);                    // 查找歌词开始单引号
333             index2 = str.rfind(L"]");                           // 查找歌词最后的"]"
334             if (index2 == wstring::npos || index2 <= index)     // 如果歌词中没有"]"
335                 index2 = index + 1;                             // 则从歌词开始单引号处查找歌词结束单引号
336             index2 = str.find(L"\'", index2);                   // 查找歌词结束单引号
337             if (index == wstring::npos || index2 == wstring::npos || index2 - index < 1) continue;
338             wstring lyric_raw{ str.substr(index + 1, index2 - index - 1) };
339             CCommon::StringReplace(lyric_raw, L"\'\'", L"\'");  // 解除歌词中的单引号转义
340             if (lyric_raw.find_first_of(L"[]") != wstring::npos)
341             {
342                 bool flag{};    // 指示当前在[]中不必分割
343                 for (size_t i{}; i < lyric_raw.size(); ++i)
344                 {
345                     if (lyric_raw[i] == L'[')
346                     {
347                         if (i != 0)
348                             lyric.split.push_back(lyric.text.size());
349                         flag = true;
350                     }
351                     else if (lyric_raw[i] == L']')
352                     {
353                         lyric.split.push_back(lyric.text.size());
354                         flag = false;
355                     }
356                     else
357                     {
358                         lyric.text += lyric_raw[i];
359                         if(!flag)
360                             lyric.split.push_back(lyric.text.size());
361                     }
362                 }
363                 auto new_end = std::unique(lyric.split.begin(), lyric.split.end());
364                 lyric.split.erase(new_end, lyric.split.end());
365             }
366             else
367             {
368                 lyric.text = lyric_raw;
369                 for (size_t i{ 1 }; i <= lyric.text.size(); ++i)
370                 {
371                     lyric.split.push_back(i);
372                 }
373             }
374             index = str.find(L"\'", index2 + 1);
375             index2 = str.find(L"\'", index + 1);
376             if (index == wstring::npos || index2 == wstring::npos || index2 - index < 1) continue;
377             wstring time_raw{ str.substr(index + 1, index2 - index - 1) };
378             vector<wstring> time_raw_s;
379             CCommon::StringSplit(time_raw, L',', time_raw_s);
380             for (auto& s : time_raw_s)
381             {
382                 lyric.word_time.push_back(_wtoi(s.c_str()));
383             }
384 
385             m_lyrics.push_back(lyric);
386         }
387     }
388 }
389 
DisposeWebVTT()390 void CLyrics::DisposeWebVTT()
391 {
392     auto getTime = [](const wstring& str, int& pos, int& time) -> bool
393         {
394             const wchar_t* time_char = L"0123456789:.";
395             int pos_start = str.find_first_of(time_char, pos);
396             if (pos_start == wstring::npos) return false;
397             int pos_end = str.find_first_not_of(time_char, pos_start);
398             if (pos_end == wstring::npos)
399                 pos_end = str.size();
400             if (pos_end - pos_start != 12 && pos_end - pos_start != 9)
401                 return false;
402             int hh{}, mm{}, ss{}, ttt{};
403             if (pos_end - pos_start == 12)
404             {
405                 hh = _wtoi(str.substr(pos_start, 2).c_str());
406                 pos_start += 3;
407             }
408             mm = _wtoi(str.substr(pos_start, 2).c_str());
409             pos_start += 3;
410             ss = _wtoi(str.substr(pos_start, 2).c_str());
411             pos_start += 3;
412             ttt = _wtoi(str.substr(pos_start, 3).c_str());
413             time = (((hh * 60) + mm) * 60 + ss) * 1000 + ttt;
414             pos = pos_end;
415             return true;
416         };
417 
418     // 不完整的实体引用解转义
419     auto unEscapeStr = [](wstring& str)
420         {
421             if (str.empty() || str.find(L'&') == wstring::npos)
422                 return;
423             CCommon::StringReplace(str, L"&amp;", L"&");
424             CCommon::StringReplace(str, L"&lt;", L"<");
425             CCommon::StringReplace(str, L"&gt;", L">");
426             CCommon::StringReplace(str, L"&quot;", L"\"");
427             CCommon::StringReplace(str, L"&apos;", L"\'");
428             CCommon::StringReplace(str, L"&nbsp;", L" ");
429         };
430 
431     m_lyrics.clear();
432     if (m_lyrics_str.empty() || !m_lyrics_str.back().empty())
433         m_lyrics_str.push_back(wstring());
434     bool find_next = true;
435     wstring text_with_tag;
436     for (const wstring& line_str : m_lyrics_str)
437     {
438         if (find_next)                      // 首先查找并处理带有-->的行,find_next设置为false标记已进入一个cue
439         {
440             if (line_str.find(L"-->") == wstring::npos)
441                 continue;
442             int pos{};
443             Lyric lyric{};
444             if (!getTime(line_str, pos, lyric.time_start_raw))
445                 continue;
446             if (!getTime(line_str, pos, lyric.time_span_raw))
447                 continue;
448             lyric.time_span_raw -= lyric.time_start_raw;
449             m_lyrics.push_back(std::move(lyric));
450             find_next = false;
451         }
452         else if (!line_str.empty())         // 在一个cue中遇到空行之前将文本全部加入text_with_tag并插入空格合并为一行
453         {
454             text_with_tag += line_str + L' ';
455         }
456         else if (!text_with_tag.empty())    // 遇到空行标志着一个cue结束,处理text_with_tag并设置find_next为true查找下一个cue
457         {
458             text_with_tag.pop_back();
459             Lyric& lyric = m_lyrics.back();
460             int time_last = lyric.time_start_raw;
461             bool in_tag = false;            // 文档中表示">"的转义是非强制(但建议)的,所以独立出现的大于号是正常的文本
462             const wchar_t* start_pos = text_with_tag.data();
463             const wchar_t* cur_pos = start_pos;
464             const wchar_t* end_pos = start_pos + text_with_tag.size();
465             while (cur_pos < end_pos)
466             {
467                 if (*cur_pos == L'<')
468                 {
469                     wstring tmp(start_pos, cur_pos);
470                     unEscapeStr(tmp);
471                     lyric.text += tmp;
472                     start_pos = cur_pos + 1;
473                     in_tag = true;
474                 }
475                 else if (*cur_pos == L'>' && in_tag)
476                 {
477                     wstring tmp(start_pos, cur_pos);
478                     int pos = 0, time = 0;
479                     if (getTime(tmp, pos, time))
480                     {
481                         lyric.split.push_back(lyric.text.size());
482                         lyric.word_time.push_back(time - time_last);
483                         time_last = time;
484                     }
485                     start_pos = cur_pos + 1;
486                     in_tag = false;
487                 }
488                 ++cur_pos;
489             }
490             wstring tmp(start_pos, cur_pos);
491             unEscapeStr(tmp);
492             lyric.text += tmp;
493             if (!lyric.split.empty())
494             {
495                 lyric.split.push_back(lyric.text.size());
496                 lyric.word_time.push_back(lyric.time_start_raw + lyric.time_span_raw - time_last);
497             }
498             text_with_tag.clear();
499             find_next = true;
500         }
501         else                                // text_with_tag还没有内容时就遇到空行,说明这个cue没有文本,查找下一个cue
502         {
503             find_next = true;
504         }
505     }
506 }
507 
NormalizeLyric()508 void CLyrics::NormalizeLyric()
509 {
510     if (m_lyrics.empty()) return;
511     std::stable_sort(m_lyrics.begin(), m_lyrics.end()); // 非必要,但为防止出错还是重新排序
512     int last{};
513     // 填充time_start,应用偏移量同时避免出现重叠,重叠的时间标签会被歌词翻译合并误处理
514     for (size_t i{}; i < m_lyrics.size(); ++i)
515     {
516         last = max(last, m_lyrics[i].time_start_raw + m_offset);
517         m_lyrics[i].time_start = last;
518         last += 10;
519     }
520     // 填充time_span
521     for (size_t i{}; i < m_lyrics.size() - 1; ++i)
522     {
523         auto& now{ m_lyrics[i] };
524         auto& next{ m_lyrics[i + 1] };
525         if (!now.word_time.empty() && now.word_time.back() < 0)     // 若是逐字歌词且没有显式提供最后匀速段的持续时长
526         {                                                           // 这里认为其持续到下一行歌词开始
527             now.word_time.back() = next.time_start - now.time_start - std::accumulate(now.word_time.begin(), now.word_time.end() - 1, 0);
528             if (now.word_time.back() < 0)
529                 now.word_time.back() = 0;
530         }
531         if (now.time_span_raw != 0)                                 // 原始歌词有行持续时间
532             now.time_span = now.time_span_raw;
533         else if (!now.word_time.empty())                            // 否则累加字时间作为行持续时间
534             now.time_span = std::accumulate(now.word_time.begin(), now.word_time.end(), 0);
535         // time_span为0说明不是逐字歌词,使用下一行开始时间作为本行结束时间; 对逐字歌词检查并阻止time_start + time_span超过下一句的time_start,防止出现时轴重叠
536         if (now.time_span == 0 || next.time_start - now.time_start < now.time_span)
537             now.time_span = next.time_start - now.time_start;
538     }
539     if (m_lyrics.size() > 0)    // 填充最后一句
540     {
541         Lyric& now = m_lyrics[m_lyrics.size() - 1];
542         if (!now.word_time.empty() && now.word_time.back() < 0)     // 若是逐字歌词且没有显式提供最后匀速段的持续时长
543         {
544             if (now.word_time.size() >= 2)
545                 now.word_time.back() = *(now.word_time.end() - 2);  // 使用前一个匀速段的持续时长
546             else
547                 now.word_time.back() = 20000;                       // 20秒
548             if (now.word_time.back() < 0)
549                 now.word_time.back() = 0;
550         }
551         if (now.time_span_raw != 0)
552             now.time_span = now.time_span_raw;
553         else if (!now.word_time.empty())
554             now.time_span = std::accumulate(now.word_time.begin(), now.word_time.end(), 0);
555     }
556 }
557 
IsEmpty() const558 bool CLyrics::IsEmpty() const
559 {
560     return (m_lyrics.size() == 0);
561 }
562 
GetLyricType() const563 CLyrics::LyricType CLyrics::GetLyricType() const
564 {
565     return m_lyric_type;
566 }
567 
GetLyricIndex(Time time) const568 int CLyrics::GetLyricIndex(Time time) const
569 {
570     if (m_lyrics.empty() || time.toInt() < m_lyrics[0].time_start)
571         return -1;
572     for (int i{ 0 }; i < static_cast<int>(m_lyrics.size()); i++)
573     {
574         if (time.toInt() < m_lyrics[i].time_start + m_lyrics[i].time_span)
575             return i;
576     }
577     return m_lyrics.size() - 1;
578 }
579 
GetLyric(int index) const580 CLyrics::Lyric CLyrics::GetLyric(int index) const
581 {
582     if (index >= 0 && index < static_cast<int>(m_lyrics.size()))
583         return m_lyrics[index];
584     if (index != -1)
585         return Lyric();
586     Lyric ti{};
587     ti.text = m_ti;
588     return ti;
589 }
590 
GetLyricIndexIgnoreBlank(int index,bool is_next) const591 int CLyrics::GetLyricIndexIgnoreBlank(int index, bool is_next) const
592 {
593     if (m_lyrics.empty()) return 0;
594     // 对齐到非空白歌词
595     if (index < 0 && m_ti.empty())
596         index = 0;
597     if (index >= 0)
598     {
599         for (int i{ index }; i < static_cast<int>(m_lyrics.size()); ++i)
600         {
601             if (!m_lyrics[i].text.empty())
602             {
603                 index = i;
604                 break;
605             }
606         }
607         if (m_lyrics[index].text.empty())       // 若向后查询失败则向前回溯,解决以多个空行结尾的歌词的显示问题
608         {
609             for (int i{ index }; i >= 0; --i)
610             {
611                 if (!m_lyrics[i].text.empty())
612                 {
613                     index = i;
614                     break;
615                 }
616             }
617         }
618     }
619     if (is_next && index >= -1)
620     {
621         index++;        // 当is_next为true时即使超出m_lyrics下标范围仍要加一
622         for (int i{ index }; i < static_cast<int>(m_lyrics.size()); ++i)
623         {
624             if (!m_lyrics[i].text.empty())
625             {
626                 index = i;
627                 break;
628             }
629         }
630     }
631     return index;
632 }
633 
GetBlankTimeBeforeLyric(int index) const634 int CLyrics::GetBlankTimeBeforeLyric(int index) const
635 {
636     if (index < 0 || index >= static_cast<int>(m_lyrics.size()) || m_lyrics[index].text.empty())
637         return 0;
638     int index_blank{ index };
639     for (int i{ index - 1 }; i >= 0; --i)
640     {
641         if (m_lyrics[i].text.empty())
642             index_blank = i;
643         else
644             break;
645     }
646     if (index_blank == 0)       // index之前的歌词全部为空行或index为第一行歌词
647          return m_lyrics[index].time_start - (m_ti.empty() ? 0 : m_lyrics[index_blank].time_start);   // 标题为空时将原本的标题时间计入空行
648     return m_lyrics[index].time_start - m_lyrics[index_blank - 1].time_start - m_lyrics[index_blank - 1].time_span;
649 }
650 
GetLyric(Time time,bool is_next,bool ignore_blank,bool blank2mark) const651 CLyrics::Lyric CLyrics::GetLyric(Time time, bool is_next, bool ignore_blank, bool blank2mark) const
652 {
653     int now_index{ GetLyricIndex(time) };
654     if (!ignore_blank)                                          // 不忽略空行,返回原始歌词
655         return GetLyric(now_index + is_next);
656     now_index = GetLyricIndexIgnoreBlank(now_index, is_next);   // 索引对齐到非空当前歌词
657     int blank_time{ GetBlankTimeBeforeLyric(now_index) };
658     if (!blank2mark || blank_time < LYRIC_BLANK_IGNORE_TIME)    // 不添加进度符号
659         return GetLyric(now_index);
660     CLyrics::Lyric lyric = GetLyric(now_index);
661     const static wstring mark = theApp.m_str_table.LoadText(L"UI_LYRIC_MUSIC_SYMBOL") + L' ';
662     if(!lyric.text.empty())                                     // 只对非空歌词添加,应对以多行空行结尾的歌词
663         lyric.text = mark + lyric.text;
664     return lyric;
665 }
666 
GetLyricProgress(Time time,bool ignore_blank,bool blank2mark,std::function<int (const wstring &)> measure) const667 int CLyrics::GetLyricProgress(Time time, bool ignore_blank, bool blank2mark, std::function<int(const wstring&)> measure) const
668 {
669     if (m_lyrics.empty()) return 0;
670     const static wstring mark = theApp.m_str_table.LoadText(L"UI_LYRIC_MUSIC_SYMBOL");
671 
672     int lyric_current_time{};       // 当前所处匀速段已进行时间
673     int lyric_last_time{};          // 当前所处匀速段总时常
674     int lyric_before_size{};        // word之前的长度
675     int lyric_word_size{};          // 当前所处匀速段长度
676     int lyric_line_size{};          // 整行长度
677 
678     int now_index{ GetLyricIndex(time) };
679     int blank_time{};
680     if (ignore_blank)        // 对齐到非空当前歌词
681     {
682         now_index = GetLyricIndexIgnoreBlank(now_index, false);
683         blank_time = GetBlankTimeBeforeLyric(now_index);
684     }
685     bool without_mark{ !blank2mark || blank_time < LYRIC_BLANK_IGNORE_TIME };
686     // 处于标题
687     if (now_index < 0)
688     {
689         lyric_current_time = time.toInt();
690         lyric_last_time = max(m_lyrics[0].time_start, 1);
691     }
692     else if (now_index >= static_cast<int>(m_lyrics.size()))
693     {
694         return 0;
695     }
696     // 处于进度符号
697     else if (time < m_lyrics[now_index].time_start)
698     {
699         if (without_mark)       // 不涉及进度符号,正常处理
700             return 0;
701         else
702         {
703             lyric_current_time = time - (m_lyrics[now_index].time_start - blank_time);
704             lyric_last_time = blank_time;
705             lyric_word_size = measure(mark);
706             lyric_line_size = measure(mark + L" " + m_lyrics[now_index].text);
707         }
708     }
709     // 处于歌词正文
710     else
711     {
712         const Lyric& now_lyric{ m_lyrics[now_index] };
713         if (!without_mark)                  // 正文通过给size预先加入符号宽度实现进度符号进度
714         {
715             lyric_before_size = measure(mark + L" ");
716             lyric_word_size = measure(now_lyric.text);
717             lyric_line_size = lyric_before_size + lyric_word_size;
718         }
719         lyric_current_time = time - now_lyric.time_start;
720         if (now_lyric.word_time.empty())    // 判断是否为逐字歌词
721         {
722             lyric_last_time = now_lyric.time_span;
723             if (lyric_last_time == 0)
724                 lyric_last_time = 20000;
725         }
726         else    // 仅在GetLyricProgress使用word_time,其他部分应当以经过Normalize的time_span为准,这样可以将非正规歌词可能导致的错误限制在Progress内
727         {
728             if (without_mark)               // 不显示进度符号+逐字歌词 在此处填充lyric_line_size
729                 lyric_line_size = measure(now_lyric.text);
730             size_t i{}, split_num{ min(now_lyric.split.size(), now_lyric.word_time.size()) };    // 避免原始歌词不标准可能导致的索引越界
731             while (i < split_num && lyric_current_time > now_lyric.word_time[i])
732                 lyric_current_time -= now_lyric.word_time[i++];
733             if (i < split_num)
734             {
735                 lyric_last_time = now_lyric.word_time[i];
736                 if (i == 0)
737                     lyric_word_size = measure(now_lyric.text.substr(0, now_lyric.split[i]));
738                 else
739                 {
740                     lyric_before_size += measure(now_lyric.text.substr(0, now_lyric.split[i - 1]));
741                     lyric_word_size = measure(now_lyric.text.substr(now_lyric.split[i - 1], now_lyric.split[i] - now_lyric.split[i - 1]));
742                 }
743             }
744             else        // 最后一句歌词或歌词不规范导致lyric_current_time大于word_time总和时返回1000
745                 return 1000;
746         }
747     }
748     int progress{ lyric_current_time * 1000 / max(lyric_last_time, 1) };
749     if (lyric_line_size > 0)
750         progress = (progress * lyric_word_size / 1000 + lyric_before_size) * 1000 / lyric_line_size;
751     // TRACE("b:%d\tw:%d\tl:%d\tp:%d\n", lyric_before_size, lyric_word_size, lyric_line_size, progress);
752     return min(progress, 1000);
753 }
754 
GetCodeType() const755 CodeType CLyrics::GetCodeType() const
756 {
757     return m_code_type;
758 }
759 
GetAllLyricText(bool with_translate) const760 wstring CLyrics::GetAllLyricText(bool with_translate) const
761 {
762     wstring all_lyric{};
763     for (auto& a_lyric : m_lyrics)
764     {
765         all_lyric += a_lyric.text;
766         all_lyric += L"\r\n";
767         if(with_translate && !a_lyric.translate.empty())
768         {
769             all_lyric += a_lyric.translate;
770             all_lyric += L"\r\n";
771         }
772     }
773     return all_lyric;
774 }
775 
GetLyricsString() const776 wstring CLyrics::GetLyricsString() const
777 {
778     wstring lyric_string{};
779     if (m_offset == 0)              // 如果时间偏移为0,则返回原始的歌词文本
780     {
781         for (auto& str : m_lyrics_str)
782         {
783             lyric_string += str;
784             lyric_string += L"\r\n";
785         }
786     }
787     else                            // 如果时间偏移不为0,返回将时间偏移写入每个时间标签后的歌词文本
788     {
789         lyric_string = GetLyricsString2();
790     }
791     if (lyric_string.size() > 1)
792     {
793         lyric_string.pop_back();    // 最后一行不需要加回车,删除末尾的\r\n
794         lyric_string.pop_back();
795     }
796     return lyric_string;
797 }
798 
GetLyricsString2(bool lyric_and_traslation_in_same_line,LyricType lyric_type) const799 wstring CLyrics::GetLyricsString2(bool lyric_and_traslation_in_same_line, LyricType lyric_type) const
800 {
801     std::wstringstream lyric_string;
802     if (lyric_type == LyricType::LY_AUTO)
803         lyric_type = m_lyric_type;
804     if (lyric_type == LyricType::LY_LRC || lyric_type == LyricType::LY_LRC_NETEASE)
805     {
806         if (m_id_tag) lyric_string << L"[id:" << m_id << L"]\r\n";
807         if (m_ti_tag) lyric_string << L"[ti:" << m_ti << L"]\r\n";
808         if (m_ar_tag) lyric_string << L"[ar:" << m_ar << L"]\r\n";
809         if (m_al_tag) lyric_string << L"[al:" << m_al << L"]\r\n";
810         if (m_by_tag) lyric_string << L"[by:" << m_by << L"]\r\n";
811         for (const auto& a_lyric : m_lyrics)
812         {
813             Time a_time{ a_lyric.time_start };
814             wstring line_str{ a_time.toLyricTimeTag()};
815             size_t split_num{ min(a_lyric.split.size(), a_lyric.word_time.size()) };    // 避免原始歌词不标准可能导致的索引越界
816             if (split_num > 0)  // 以扩展lrc形式存储逐字信息,舍弃行时长time_span
817             {
818                 for (size_t i{}; i < split_num; ++i)
819                 {
820                     if (i == 0)
821                         line_str += a_lyric.text.substr(0, a_lyric.split[i]);
822                     else
823                         line_str += a_lyric.text.substr(a_lyric.split[i - 1], a_lyric.split[i] - a_lyric.split[i - 1]);
824                     a_time += a_lyric.word_time[i];
825                     line_str += a_time.toLyricTimeTag();
826                 }
827             }
828             else
829             {
830                 line_str += a_lyric.text;
831             }
832             if (!a_lyric.translate.empty())
833             {
834                 //歌词和翻译在同一行,在" / "后面添加翻译
835                 if (lyric_and_traslation_in_same_line)
836                 {
837                     line_str += L" / ";
838                     line_str += a_lyric.translate;
839                 }
840                 //歌词和翻译在不同行,为翻译添加一行新的歌词
841                 else
842                 {
843                     line_str += L"\r\n";
844                     line_str += Time(a_lyric.time_start).toLyricTimeTag();
845                     line_str += a_lyric.translate;
846                 }
847             }
848             lyric_string << line_str << L"\r\n";
849         }
850     }
851     else if (lyric_type == LyricType::LY_KSC)
852     {
853         for (const wstring& str : m_lyrics_str) // 不清楚规则故暂不修改非歌词行
854         {
855             if (str.find(L"karaoke.add") != wstring::npos) break;
856             lyric_string << str << L"\r\n";
857         }
858         wchar_t time_buff[16];
859         for (const auto& a_lyric : m_lyrics)    // 重新构建歌词行
860         {
861             lyric_string << L"karaoke.add('";
862             Time a_time{ a_lyric.time_start };
863             swprintf_s(time_buff, L"%.2d:%.2d.%.3d", a_time.min, a_time.sec, a_time.msec);
864             lyric_string << time_buff;
865             lyric_string << L"', '";
866             a_time += a_lyric.time_span;
867             swprintf_s(time_buff, L"%.2d:%.2d.%.3d", a_time.min, a_time.sec, a_time.msec);
868             lyric_string << time_buff;
869             lyric_string << L"', '";
870             wstring text{};
871             for (size_t i{}; i < a_lyric.split.size(); ++i)
872             {
873                 wstring word;
874                 if (i == 0)
875                     word = a_lyric.text.substr(0, a_lyric.split[i]);
876                 else
877                     word = a_lyric.text.substr(a_lyric.split[i - 1], a_lyric.split[i] - a_lyric.split[i - 1]);
878                 if (word.size() == 1 && word[0] > 127)
879                     text += word;
880                 else
881                     text += L'[' + word + L']';
882             }
883             CCommon::StringReplace(text, L"\'", L"\'\'");   // 转义单引号
884             lyric_string << text;
885             lyric_string << L"', '";
886             for (size_t i{}; i < a_lyric.word_time.size(); ++i)
887             {
888                 lyric_string << std::to_wstring(a_lyric.word_time[i]);
889                 if (i != a_lyric.word_time.size() - 1)
890                     lyric_string << L",";
891             }
892             lyric_string << L"');\r\n";
893         }
894     }
895     else if (lyric_type == LyricType::LY_VTT)
896     {
897         // 更不完整的实体引用转义
898         auto escapeStr = [](wstring str) -> wstring
899             {
900                 CCommon::StringReplace(str, L"&", L"&amp;");
901                 CCommon::StringReplace(str, L"<", L"&lt;");
902                 CCommon::StringReplace(str, L">", L"&gt;");
903                 return str;
904             };
905 
906         int index{};
907         // 放弃未识别的部分,重新生成所有有效cue
908         lyric_string << L"WEBVTT\r\n";
909         for (const auto& a_lyric : m_lyrics)
910         {
911             lyric_string << L"\r\n" << ++index << L"\r\n";
912             lyric_string << Time(a_lyric.time_start).toVttTimeTag() << L" --> " << Time(a_lyric.time_start + a_lyric.time_span).toVttTimeTag() << L"\r\n";
913             if (a_lyric.split.empty())
914                 lyric_string << escapeStr(a_lyric.text) << L"\r\n";
915             else
916             {
917                 int start_time = a_lyric.time_start;
918                 int end_time = start_time + a_lyric.time_span;
919                 int pos_start = 0;
920                 size_t split_num{ min(a_lyric.split.size(), a_lyric.word_time.size()) };    // 避免原始歌词不标准可能导致的索引越界
921                 for (size_t i{}; i < split_num; ++i)
922                 {
923                     lyric_string << escapeStr(a_lyric.text.substr(pos_start, a_lyric.split[i] - pos_start));
924                     pos_start = a_lyric.split[i];
925                     start_time += a_lyric.word_time[i];
926                     if (start_time < end_time)             // 不添加不合法的时间标签(卡掉最后一个与end_time相同的时间标签)
927                         lyric_string << L'<' << Time(start_time).toVttTimeTag() << L'>';
928                 }
929                 lyric_string << L"\r\n";
930             }
931         }
932     }
933 
934     return lyric_string.str();
935 }
936 
SaveLyric2(bool lyric_and_traslation_in_same_line)937 void CLyrics::SaveLyric2(bool lyric_and_traslation_in_same_line)
938 {
939     if (m_lyrics.size() == 0) return;   // 没有歌词时直接返回
940 
941     // 保存歌词到文件,将偏移量存入每个时间标签
942     bool char_connot_convert;
943     string lyric_str = CCommon::UnicodeToStr(GetLyricsString2(lyric_and_traslation_in_same_line), m_code_type, &char_connot_convert);
944     ASSERT(!char_connot_convert);
945     ofstream out_put{ m_file, std::ios::binary };
946     out_put << lyric_str;
947     out_put.close();
948     m_modified = false;
949 }
950 
CombineSameTimeLyric(int error)951 void CLyrics::CombineSameTimeLyric(int error)
952 {
953     std::stable_sort(m_lyrics.begin(), m_lyrics.end());
954     for (int i{}; i < static_cast<int>(m_lyrics.size() - 1); i++)
955     {
956         if (m_lyrics[i + 1].time_start_raw - m_lyrics[i].time_start_raw <= error)   // 找到相同时间标签的歌词
957         {
958             m_lyrics[i].translate = m_lyrics[i + 1].text;
959             m_lyrics.erase(m_lyrics.begin() + i + 1);   // 删除后面一句歌词
960             m_text_and_translatein_in_same_line = false;
961         }
962     }
963 }
964 
DeleteRedundantLyric()965 void CLyrics::DeleteRedundantLyric()
966 {
967     for (size_t i{}; i < m_lyrics.size(); i++)
968     {
969         if (m_lyrics[i].time_start >= 6000000)                      // 找到一句歌词的时间标签大于100分钟
970         {
971             m_lyrics.erase(m_lyrics.begin() + i, m_lyrics.end());   // 删除该句歌词及其后面的所有歌词
972             break;
973         }
974     }
975 }
976 
SwapTextAndTranslation()977 void CLyrics::SwapTextAndTranslation()
978 {
979     for (auto& lyric : m_lyrics)
980     {
981         std::swap(lyric.text, lyric.translate);
982     }
983 }
984 
TimeTagForward()985 void CLyrics::TimeTagForward()
986 {
987     // 用后一句歌词的时间标签覆盖前面的
988     if (m_lyrics.size() > 1)
989     {
990         for (size_t i{}; i < m_lyrics.size() - 1; i++)
991         {
992             m_lyrics[i].time_start = m_lyrics[i + 1].time_start;
993         }
994     }
995 }
996 
TimeTagDelay()997 void CLyrics::TimeTagDelay()
998 {
999     // 用前一句歌词的时间标签覆盖后面的
1000     if (m_lyrics.size() > 1)
1001     {
1002         for (size_t i{ m_lyrics.size() - 1 }; i > 0; i--)
1003         {
1004             m_lyrics[i].time_start = m_lyrics[i - 1].time_start;
1005         }
1006     }
1007 }
1008 
1009 // 解析使用括号包含的歌词翻译
ParseLyricTextWithBracket(const wstring & lyric_text_ori,wstring & lyric_text,wstring & lyric_translate,wchar_t bracket_left,wchar_t bracket_right)1010 static bool ParseLyricTextWithBracket(const wstring& lyric_text_ori, wstring& lyric_text, wstring& lyric_translate, wchar_t bracket_left, wchar_t bracket_right)
1011 {
1012     size_t index1 = lyric_text_ori.rfind(bracket_left);        // 左括号的位置
1013     size_t index2 = lyric_text_ori.rfind(bracket_right);       // 右括号的位置
1014     if (index1 == wstring::npos || index2 == wstring::npos || index1 >= lyric_text_ori.size() - 1 || index1 >= index2)      // 确保左括号在右括号的左边
1015         return false;
1016     lyric_translate = lyric_text_ori.substr(index1 + 1, index2 - index1 - 1);           // 取括号之间的文本作为翻译
1017     lyric_text = lyric_text_ori.substr(0, index1) + lyric_text_ori.substr(index2 + 1);  // 其余部分作为歌词原文
1018     return true;
1019 }
1020 
ExtractTranslationFromBrackets()1021 void CLyrics::ExtractTranslationFromBrackets()
1022 {
1023     // 若对已有翻译的歌词使用则放弃原翻译
1024     m_translate = false;
1025     for (Lyric lyric : m_lyrics)
1026     {
1027         wstring temp = lyric.text;
1028         // 按带括号的翻译格式解析
1029         if (   ParseLyricTextWithBracket(temp, lyric.text, lyric.translate, L'【', L'】')//【】
1030             || ParseLyricTextWithBracket(temp, lyric.text, lyric.translate, L'〖', L'〗')//〖〗
1031             || ParseLyricTextWithBracket(temp, lyric.text, lyric.translate, L'「', L'」')//「」
1032             || ParseLyricTextWithBracket(temp, lyric.text, lyric.translate, L'『', L'』')//『』
1033             )
1034         {
1035             m_translate = true;
1036         }
1037         else
1038         {
1039             lyric.text = temp;
1040             lyric.translate.clear();
1041         }
1042     }
1043 }
1044 
AdjustLyric(int offset)1045 void CLyrics::AdjustLyric(int offset)
1046 {
1047     if (m_lyrics.size() == 0) return;  // 没有歌词时直接返回
1048     m_offset += offset;
1049     m_modified = true;
1050     NormalizeLyric();
1051 }
1052 
ChineseConvertion(bool simplified)1053 void CLyrics::ChineseConvertion(bool simplified)
1054 {
1055     for (auto& lyric : m_lyrics)
1056     {
1057         if (m_translate)               // 如果当前歌词有翻译,则只对全部翻译文本转换
1058         {
1059             if (simplified)
1060                 lyric.translate = CCommon::TranslateToSimplifiedChinese(lyric.translate);
1061             else
1062                 lyric.translate = CCommon::TranslateToTranditionalChinese(lyric.translate);
1063         }
1064         else
1065         {
1066             if (simplified)
1067                 lyric.text = CCommon::TranslateToSimplifiedChinese(lyric.text);
1068             else
1069                 lyric.text = CCommon::TranslateToTranditionalChinese(lyric.text);
1070         }
1071     }
1072     m_modified = true;
1073 }
1074