Newer
Older
Sakayaki / Services / SyncService.cs
@fabre fabre on 23 Jan 5 KB 他说他超进化了
using System.Globalization;
using Microsoft.EntityFrameworkCore;
using Sakayaki.Models;

namespace Sakayaki.Services;

public sealed class SyncService(AppDbContext dbContext)
{
    private readonly AppDbContext _dbContext = dbContext;

    /// <summary>
    /// 扫描多个作者的 Fanbox 目录并同步新文件夹到数据库。
    /// </summary>
    public async Task<int> SyncFanboxFoldersAsync(
        string root,
        CancellationToken cancellationToken = default)
    {
        // 入口参数校验,确保根路径与作者信息有效。
        if (string.IsNullOrWhiteSpace(root))
            throw new ArgumentException("Root path is required.", nameof(root));

        var inserted = 0;

        var pending = new List<FanboxFolder>();
        // 预加载已有关键词,后续用于关键词复用与命中。
        var existingKeywords = await LoadExistingKeywordsAsync(cancellationToken);

        foreach (var authorDir in Directory.GetDirectories(root))
        {
            cancellationToken.ThrowIfCancellationRequested();

            var author = Path.GetFileName(authorDir);
            if (string.IsNullOrWhiteSpace(author))
                continue;

            foreach (var dir in Directory.GetDirectories(authorDir))
            {
                cancellationToken.ThrowIfCancellationRequested();

                var folderName = Path.GetFileName(dir);
                // 约定:以 yyyy-MM-dd- 开头的目录才参与同步。
                if (folderName.Length < 11 || folderName[10] != '-')
                    continue;

                var datePart = folderName.Substring(0, 10);
                // 解析目录名前 10 位日期,失败则跳过。
                if (!DateTime.TryParseExact(
                        datePart,
                        "yyyy-MM-dd",
                        CultureInfo.InvariantCulture,
                        DateTimeStyles.None,
                        out var date))
                    continue;

                var title = folderName.Substring(11);
                // 组合标题与已有关键词,生成本次关键词列表。
                var keywordsStr = BuildKeywords(title, existingKeywords);
                // 统计目录内文件数量,用于展示/校验。
                var fileCount = Directory.GetFiles(dir).Length;

                // 数据库中已存在相同作者 + 日期 + 标题时跳过。
                var exists = await _dbContext.FanboxFolders.AsNoTracking().AnyAsync(
                    x => x.Author == author && x.Date == date && x.Title == title,
                    cancellationToken);
                if (exists)
                    continue;

                // 待插入列表先暂存,最后一次性写入。
                pending.Add(new FanboxFolder
                {
                    FolderName = folderName,
                    Author = author,
                    Date = date,
                    Title = title,
                    Keywords = keywordsStr,
                    FileCount = fileCount
                });
            }
        }

        if (pending.Count == 0)
            return 0;

        // 批量写入并返回实际插入数量。
        _dbContext.FanboxFolders.AddRange(pending);
        inserted = await _dbContext.SaveChangesAsync(cancellationToken);

        return inserted;
    }

    /// <summary>
    /// 从标题中抽取关键词,并结合已有关键词集合进行补全。
    /// </summary>
    private static string? BuildKeywords(string title, IReadOnlyCollection<string> existingKeywords)
    {
        // 使用 HashSet 去重,保持关键词唯一性。
        var hit = new HashSet<string>(StringComparer.Ordinal);

        // 统一替换常见分隔符,便于分词。
        var cleaned = title
            .Replace("【", " ")
            .Replace("】", " ")
            .Replace("(", " ")
            .Replace(")", " ")
            .Replace("(", " ")
            .Replace(")", " ")
            .Replace("/", " ")
            .Replace("/", " ")
            .Replace(",", " ")
            .Replace("_", " ")
            .Replace("-", " ");

        // 以空格切分并过滤短词,避免噪声。
        foreach (var w in cleaned.Split(' ', StringSplitOptions.RemoveEmptyEntries))
        {
            if (w.Length >= 2)
                hit.Add(w);
        }

        // 如果标题包含已有关键词,则补充命中。
        foreach (var k in existingKeywords)
        {
            if (title.Contains(k, StringComparison.Ordinal))
                hit.Add(k);
        }

        return hit.Count > 0 ? string.Join(",", hit) : null;
    }

    /// <summary>
    /// 从数据库中加载并去重全部历史关键词。
    /// </summary>
    private async Task<IReadOnlyCollection<string>> LoadExistingKeywordsAsync(CancellationToken cancellationToken)
    {
        var keywordLists = await _dbContext.FanboxFolders.AsNoTracking()
            .Select(x => x.Keywords)
            .Where(x => x != null && x != string.Empty)
            .Distinct()
            .ToListAsync(cancellationToken);

        // 逐条拆分关键词字符串,聚合到唯一集合。
        var keywords = new HashSet<string>(StringComparer.Ordinal);
        foreach (var list in keywordLists)
        {
            if (string.IsNullOrWhiteSpace(list))
                continue;

            foreach (var keyword in list.Split(',', StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries))
            {
                keywords.Add(keyword);
            }
        }

        return keywords;
    }
}