using System.Globalization;
using Microsoft.EntityFrameworkCore;
using Sakayaki.Models;
namespace Sakayaki.Services;
public sealed class SyncService(AppDbContext dbContext)
{
private readonly AppDbContext _dbContext = dbContext;
/// <summary>
/// 扫描多个作者的 Fanbox 目录并同步新文件夹到数据库。
/// </summary>
public async Task<int> SyncFanboxFoldersAsync(
string root,
CancellationToken cancellationToken = default)
{
// 入口参数校验,确保根路径与作者信息有效。
if (string.IsNullOrWhiteSpace(root))
throw new ArgumentException("Root path is required.", nameof(root));
var inserted = 0;
var pending = new List<FanboxFolder>();
// 预加载已有关键词,后续用于关键词复用与命中。
var existingKeywords = await LoadExistingKeywordsAsync(cancellationToken);
foreach (var authorDir in Directory.GetDirectories(root))
{
cancellationToken.ThrowIfCancellationRequested();
var author = Path.GetFileName(authorDir);
if (string.IsNullOrWhiteSpace(author))
continue;
foreach (var dir in Directory.GetDirectories(authorDir))
{
cancellationToken.ThrowIfCancellationRequested();
var folderName = Path.GetFileName(dir);
// 约定:以 yyyy-MM-dd- 开头的目录才参与同步。
if (folderName.Length < 11 || folderName[10] != '-')
continue;
var datePart = folderName.Substring(0, 10);
// 解析目录名前 10 位日期,失败则跳过。
if (!DateTime.TryParseExact(
datePart,
"yyyy-MM-dd",
CultureInfo.InvariantCulture,
DateTimeStyles.None,
out var date))
continue;
var title = folderName.Substring(11);
// 组合标题与已有关键词,生成本次关键词列表。
var keywordsStr = BuildKeywords(title, existingKeywords);
// 统计目录内文件数量,用于展示/校验。
var fileCount = Directory.GetFiles(dir).Length;
// 数据库中已存在相同作者 + 日期 + 标题时跳过。
var exists = await _dbContext.FanboxFolders.AsNoTracking().AnyAsync(
x => x.Author == author && x.Date == date && x.Title == title,
cancellationToken);
if (exists)
continue;
// 待插入列表先暂存,最后一次性写入。
pending.Add(new FanboxFolder
{
FolderName = folderName,
Author = author,
Date = date,
Title = title,
Keywords = keywordsStr,
FileCount = fileCount
});
}
}
if (pending.Count == 0)
return 0;
// 批量写入并返回实际插入数量。
_dbContext.FanboxFolders.AddRange(pending);
inserted = await _dbContext.SaveChangesAsync(cancellationToken);
return inserted;
}
/// <summary>
/// 从标题中抽取关键词,并结合已有关键词集合进行补全。
/// </summary>
private static string? BuildKeywords(string title, IReadOnlyCollection<string> existingKeywords)
{
// 使用 HashSet 去重,保持关键词唯一性。
var hit = new HashSet<string>(StringComparer.Ordinal);
// 统一替换常见分隔符,便于分词。
var cleaned = title
.Replace("【", " ")
.Replace("】", " ")
.Replace("(", " ")
.Replace(")", " ")
.Replace("(", " ")
.Replace(")", " ")
.Replace("/", " ")
.Replace("/", " ")
.Replace(",", " ")
.Replace("_", " ")
.Replace("-", " ");
// 以空格切分并过滤短词,避免噪声。
foreach (var w in cleaned.Split(' ', StringSplitOptions.RemoveEmptyEntries))
{
if (w.Length >= 2)
hit.Add(w);
}
// 如果标题包含已有关键词,则补充命中。
foreach (var k in existingKeywords)
{
if (title.Contains(k, StringComparison.Ordinal))
hit.Add(k);
}
return hit.Count > 0 ? string.Join(",", hit) : null;
}
/// <summary>
/// 从数据库中加载并去重全部历史关键词。
/// </summary>
private async Task<IReadOnlyCollection<string>> LoadExistingKeywordsAsync(CancellationToken cancellationToken)
{
var keywordLists = await _dbContext.FanboxFolders.AsNoTracking()
.Select(x => x.Keywords)
.Where(x => x != null && x != string.Empty)
.Distinct()
.ToListAsync(cancellationToken);
// 逐条拆分关键词字符串,聚合到唯一集合。
var keywords = new HashSet<string>(StringComparer.Ordinal);
foreach (var list in keywordLists)
{
if (string.IsNullOrWhiteSpace(list))
continue;
foreach (var keyword in list.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
{
keywords.Add(keyword);
}
}
return keywords;
}
}