1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786 |
- using Aliyun.OSS;
- using Elasticsearch.Net;
- using JiaZhiQuan.Common.AliOSS;
- using JiaZhiQuan.Common.Config;
- using JiaZhiQuan.Common.ElasticSearch;
- using JiaZhiQuan.Common.ElasticSearch.Models;
- using JiaZhiQuan.Common.SnowFlake;
- using Newtonsoft.Json;
- using Newtonsoft.Json.Linq;
- using Org.BouncyCastle.Utilities;
- using Polly;
- using Senparc.Weixin.MP.Containers;
- using System;
- using System.Collections.Generic;
- using System.Data;
- using System.Diagnostics;
- using System.Dynamic;
- using System.Linq;
- using System.Net.Http;
- using System.Reflection;
- using System.Security.Cryptography;
- using System.Text;
- using System.Threading.Tasks;
- using Wicture.DbRESTFul;
- using Wicture.DbRESTFul.Cache;
- using Wicture.DbRESTFul.Infrastructure.Repository;
- using Wicture.DbRESTFul.Rpc.Client;
- namespace JiaZhiQuan.Common
- {
- public class PostHotValue
- {
- public static int GetRoleScore(int role, bool superior)
- {
- int basic = 100;
- if (role > 0)
- {
- basic += 70;
- }
- if (superior)
- {
- basic += 50;
- }
- if (role == 2)
- {
- basic += 50;
- }
- return basic;
- }
- /// <summary>
- /// 客观事实数据(阅读量相关)
- /// </summary>
- public float fact { get; set; } = 0;
- /// <summary>
- /// 平台主观策略分(内容评定分)
- /// </summary>
- public float subjectiveScore { get; set; }
- /// <summary>
- /// 账号风险系数
- /// </summary>
- public float cntHotCoefficient { get; set; }
- public float hot { get; set; }
- }
- public class PostScore
- {
- public long id { get; set; }
- public long userId { get; set; }
- public double hot { get; set; }
- public PostScore Clone()
- {
- return new PostScore
- {
- id = id,
- userId = userId,
- hot = hot
- };
- }
- }
- public static partial class RepositoryExtension
- {
- /// <summary>
- /// 通过给定的PostScoreList基础上,来计算此用户个人的推荐列表,最多返回maxPostCount个结果。如果用户未关注任何人,则无需计算
- /// </summary>
- /// <param name="list">给定的内容分列表,在此基础上添加个性推荐分</param>
- /// <param name="userCntHotCoefficientDic">账号热度系数</param>
- private static async Task<List<PostScore>> GetPersonalRecommendList(DbRESTFulRepository repository, List<PostScore> list, Dictionary<long, float> userCntHotCoefficientDic, long userId, IRpcClient rpcClient, int maxPostCount = 1000)
- {
- if (list == null || list.Count == 0) return new List<PostScore>();
- // 查出用户最新关注的500用户
- var focusUserIds = userId > 0 ? (await repository.QueryAsync<long>($"select userId from n_user_fan where fanId={userId} order by id desc")).ToList() : new List<long>();
- // 因为现在只有关注才影响热度值,所以没有关注,则无需计算
- // if (focusUserIds.Count == 0) return new List<PostScore>();
- var now = DateTime.Now;
- if (userId > 0)
- {
- var changed = false;
- list.ForEach(e =>
- {
- if (focusUserIds.Contains(e.userId))
- {
- e.hot += 200 / Math.Ceiling((now - IdWorker.DecodeId(e.id).Time).TotalDays) * userCntHotCoefficientDic[e.userId];
- if (!changed) changed = true;
- }
- });
- var list1 = changed ? list.OrderByDescending(x => x.hot).ToList() : list;
- // 过滤已看过的,如果未看的内容不足最大推荐数量,则将已看内容添加至未看列表末尾
- var filterRst = (await rpcClient.InvokeRpcAsync(repository.Context, "UserVisitService", "FilterNotExist", string.Join(',', list1.Select(e => $"{userId}_{e.id}")))).Data;
- var filteredList = (JsonConvert.DeserializeObject<string>(filterRst) ?? string.Empty).Split(',', StringSplitOptions.RemoveEmptyEntries).Select(e => long.Parse(e.Split('_')[1])).ToHashSet();
- if (filteredList.Count > 0)
- {
- var validList = new List<PostScore>();
- var readList = new List<PostScore>();
- foreach (var e in list1)
- {
- if (filteredList.Contains(e.id))
- {
- validList.Add(e);
- if (validList.Count >= maxPostCount) break;
- }
- else
- {
- readList.Add(e);
- }
- }
- if (validList.Count < maxPostCount && readList.Count > 0)
- {
- foreach (var e in readList)
- {
- validList.Add(e);
- if (validList.Count >= maxPostCount) break;
- }
- }
- return validList;
- }
- else
- {
- return list1.Count > maxPostCount ? list1.Take(maxPostCount).ToList() : list1;
- }
- }
- else
- {
- return list.Count > maxPostCount ? list.Take(maxPostCount).ToList() : list;
- }
- }
- /// <summary>
- /// 对postIds列表重新排列,将连续相同用户间隔开
- /// </summary>
- /// <param name="postIds">文章Id列表</param>
- /// <param name="userIds">用户Id列表,个数与文章Id列表一致,如果传空,则会通过PostIds在数据库中查询对应的用户Id</param>
- /// <param name="firstFixed">是否锁定第一个</param>
- /// <returns>返回(文章编号列表, 是否存在顺序重排)</returns>
- public static async Task<(List<long>, bool)> RearrangePostIds(this DbRESTFulRepository repository, List<long> postIds, List<long> userIds, bool firstFixed = false, IDbConnection conn = null)
- {
- var rearranged = false;
- // 找出文章id对应的用户编号, 500为一批进行查询
- var size = 500;
- userIds ??= new List<long>(postIds.Count);
- if (userIds.Count == 0)
- {
- for (var i = 0; i < postIds.Count; i += size)
- {
- var leftCount = postIds.Count - i;
- var subList = postIds.Skip(i).Take(leftCount >= size ? size : leftCount).ToList();
- // 查出对应的用户id
- var map = (await repository.QueryAsync<dynamic>("select id, userId from n_post where id in @ids",
- new { ids = subList }, conn, null, false)).ToDictionary(e => (long)e.id);
- postIds.ForEach(postId =>
- {
- userIds.Add(map.ContainsKey(postId) ? (long)map[postId].userId : 0);
- });
- }
- }
- // 如果文章id列表与用户id列表无法一一对应,则直接返回文章id列表
- if (userIds.Count != postIds.Count) return (postIds, rearranged);
-
- var count = postIds.Count;
- var hasRepetitiveItems = false;
- for (var i = 1; i < count; i++)
- {
- if (userIds[i - 1] == userIds[i])
- {
- var exchanged = false;
- // 如果当前文章用户与上一个文章用户相同,则向后查找一个与当前用户不同的记录
- for (var j = i + 1; j < count; j++)
- {
- if (userIds[j] != userIds[i])
- {
- // 交换
- postIds[i] ^= postIds[j];
- postIds[j] = postIds[i] ^ postIds[j];
- postIds[i] ^= postIds[j];
- userIds[i] ^= userIds[j];
- userIds[j] = userIds[i] ^ userIds[j];
- userIds[i] ^= userIds[j];
- exchanged = true;
- rearranged = true;
- break;
- }
- }
- // 如果未找到可以替换的,则表示后面的全都是相同用户的内容,无需进行后续查找与替换
- if (!exchanged)
- {
- hasRepetitiveItems = true;
- break;
- }
- }
- }
- // 如果存在未能替换的,则反向执行替换一次
- if (hasRepetitiveItems)
- {
- var reverseEndIdx = firstFixed ? 1 : 0;
- for (var i = count - 2; i > reverseEndIdx; i--)
- {
- if (userIds[i + 1] == userIds[i])
- {
- var exchanged = false;
- // 如果当前文章用户与上一个文章用户相同,则向后查找一个与当前用户不同的记录
- for (var j = i - 1; j >= reverseEndIdx; j--)
- {
- if (userIds[j] != userIds[i])
- {
- // 交换
- postIds[i] ^= postIds[j];
- postIds[j] = postIds[i] ^ postIds[j];
- postIds[i] ^= postIds[j];
- userIds[i] ^= userIds[j];
- userIds[j] = userIds[i] ^ userIds[j];
- userIds[i] ^= userIds[j];
- exchanged = true;
- rearranged = true;
- break;
- }
- }
- // 如果未找到可以替换的,则表示后面的全都是相同用户的内容,无需进行后续查找与替换
- if (!exchanged)
- {
- break;
- }
- }
- }
- }
- return (postIds, rearranged);
- }
-
- /// <summary>
- /// 将推荐的内容保存到Redis中
- /// </summary>
- /// <param name="categoryType">如果没有,则传null</param>
- /// <param name="contentType">null表示所有,1为带图片的,2为带视频的</param>
- /// <param name="hasTopic">如果没有,则传null</param>
- private static async Task UpdatePersonalRecommendListToRedis(DbRESTFulRepository repository, RedisCacheProvider cacheProvider, long[] ids, long userId, int? categoryType, int? contentType, bool? hasTopic, int dayOfMonth)
- {
- // 重排文章列表(将连续用户分开)
- ids = (await RearrangePostIds(repository, ids.ToList(), null)).Item1.ToArray();
-
- var key = CacheKeys.PostRecommendList(dayOfMonth, userId, categoryType, contentType, hasTopic);
- await cacheProvider.CSRedisClient.DelAsync(key);
- if (ids.Length == 0) return;
- await cacheProvider.CSRedisClient.RPushAsync(key, ids);
- await cacheProvider.CSRedisClient.ExpireAsync(key, CacheKeys.PostRecommendListCacheSecs);
- }
- /// <summary>
- /// 获取满足条件的文章列表(热度>0的)
- /// </summary>
- /// <param name="categoryType">如果没有,则传null</param>
- /// <param name="contentType">null表示所有,1为带图片的,2为带视频的</param>
- /// <param name="hasTopic">如果没有,则传null</param>
- /// <param name="userCntHotCoefficientDic">如果传入的字典不为空,则在查询结果后,会将结果中用户对应的账号热度系数查出来,并存入字典</param>
- /// <param name="maxPostCount">最多获取多少条</param>
- private static async Task<List<PostScore>> GetHotPostList(DbRESTFulRepository repository, ConfigFromDb config, int categoryType, int? contentType, bool? hasTopic = null, Dictionary<long, float> userCntHotCoefficientDic = null, int maxPostCount = 5000, IDbConnection conn = null)
- {
- var postList = new List<PostScore>();
- var minId = IdWorker.GetStartIdByDate(
- string.IsNullOrEmpty(config.PostStartRecommendDate)
- ? DateTime.Now.AddDays(-config.PostMaxRecommendDays)
- : DateTime.Parse(config.PostStartRecommendDate)
- );
- var mustConditions = new List<dynamic>
- {
- new { term = new { state = 1 } },
- new { range = new { hot = new { gt = 0 } } },
- new { term = new { categoryType } },
- new { range = new { id = new { gt = minId } } },
- };
- // 如果是带图片的
- if (contentType == 1)
- {
- mustConditions.Add(new { term = new { type = 0 } });
- mustConditions.Add(new { range = new { coverWidth = new { gt = 0 } } });
- }
- else if (contentType == 2)
- {
- mustConditions.Add(new { term = new { type = 1 } });
- }
- if (hasTopic != null)
- {
- mustConditions.Add(new { script = new { script = "doc['validTopicIds'].length>0" } });
- }
- var query = new
- {
- _source = new string[] { "id", "userId", "hot" },
- size = maxPostCount,
- query = new
- {
- @bool = new
- {
- filter = new
- {
- @bool = new
- {
- must = mustConditions
- }
- }
- }
- },
- sort = new List<dynamic>
- {
- new
- {
- hot = new { order = "desc" }
- },
- new
- {
- id = new { order = "desc" }
- }
- }
- };
- var resp = await ESHelper.Client.SearchAsync<StringResponse>(ESConstants.ESIndexName.Post, PostData.Serializable(query));
- if (resp.Success)
- {
- var obj = JsonConvert.DeserializeObject<JToken>(resp.Body);
- var list = obj.Value<JObject>("hits")?.Value<JArray>("hits")?.Select(e =>
- {
- var source = e.Value<JToken>("_source");
- return new PostScore
- {
- id = source.Value<long>("id"),
- userId = source.Value<long>("userId"),
- hot = source.Value<double>("hot"),
- };
- })?.ToList();
- if (list != null)
- {
- postList = list;
- }
- }
- else
- {
- LoggerManager.Logger.Error("GetHotPostList ES查询出错 " + resp?.Body);
- }
- if (userCntHotCoefficientDic != null)
- {
- var ids = new HashSet<long>();
- postList.ForEach(e =>
- {
- if (userCntHotCoefficientDic.ContainsKey(e.userId) || ids.Contains(e.userId)) return;
- ids.Add(e.userId);
- });
- if (ids.Count > 0)
- {
- var batchSize = 500;
- for (var i = 0; i < ids.Count; i += batchSize)
- {
- var count = i + batchSize > ids.Count ? ids.Count - i : batchSize;
- var subList = i == 0 && ids.Count <= batchSize ? ids : ids.Skip(i).Take(count);
- var userCntHotCoefficients = await repository.QueryAsync<dynamic>("select id, cntHotCoefficient from n_user where id in @ids", new { ids = subList }, conn);
- userCntHotCoefficients.ForEach(e =>
- {
- userCntHotCoefficientDic[(long)e.id] = (float)e.cntHotCoefficient;
- });
- }
- }
- }
- return postList;
- }
- /// <summary>
- /// 更新推荐缓存列表
- /// </summary>
- /// <param name="updatePublicOnly">是否仅更新未登录用户的热门列表</param>
- public static async Task UpdateUserRecommendPostCache(this DbRESTFulRepository repository, bool calcPersonal, RedisCacheProvider cacheProvider, IRpcClient rpcClient, ConfigFromDb config)
- {
- // 每类型中取出最大值
- var maxPostCount = 10000;
- Dictionary<long, float> userCntHotCoefficientDic = null;
- if (calcPersonal)
- {
- userCntHotCoefficientDic = new Dictionary<long, float>();
- }
- // 记录推荐缓存同步记录
- var syncRecordId = await repository.QuerySingleOrDefaultAsync<int>("insert into n_post_recommend_records(startAt) values(now()); select last_insert_id() as id;", null, null, null, false);
- var syncNote = new StringBuilder();
- using var conn = repository.ConnectionManager.GetConnection();
- conn.Open();
- var days = 10; // 10天内活跃的,且有关注记录的用户
- var now = DateTime.Now;
- //var standpoints = await GetHotPostList(repository, 2, null, null, userCntHotCoefficientDic, maxPostCount, conn);
- // -9999表示所有一级分类
- var diaryLst = new Dictionary<int, List<PostScore>>();
- diaryLst[-9999] = await GetHotPostList(repository, config, 1, null, null, userCntHotCoefficientDic, maxPostCount, conn);
- // diaryLst[-9998] = await GetHotPostList(repository, config, 1, null, true, userCntHotCoefficientDic, maxPostCount, conn);
- // 带图片的图文笔记
- diaryLst[1] = await GetHotPostList(repository, config, 1, 1, null, userCntHotCoefficientDic, maxPostCount, conn);
- // 视频笔记
- diaryLst[2] = await GetHotPostList(repository, config, 1, 2, null, userCntHotCoefficientDic, maxPostCount, conn);
- var dayOfMonth = now.Day;
-
- foreach (var di in diaryLst)
- {
- var nrl = await GetPersonalRecommendList(repository, diaryLst[di.Key], userCntHotCoefficientDic, 0, rpcClient, 1000);
- await UpdatePersonalRecommendListToRedis(repository, cacheProvider, nrl.Select(i => i.id).ToArray(), 0, 1,
- di.Key < 0 ? null : (int?)di.Key, null, dayOfMonth);
- }
- if (calcPersonal)
- {
- var startDate = now.AddDays(-days).ToString("yyyy-MM-dd");
- // 查出10天内活跃用户
- var userIds = (await repository.QueryAsync<long>($"select distinct userId from n_user_online where `date`>='{startDate}'", null, conn)).ToList();
- var userFetchSize = 500;
- var emptyPostIds = Array.Empty<long>();
- for (var i = 0; i < userIds.Count; i += userFetchSize)
- {
- var count = i + userFetchSize > userIds.Count ? userIds.Count - i : userFetchSize;
- var subList = i == 0 && userIds.Count <= userFetchSize ? userIds : userIds.Skip(i).Take(count).ToList();
- // 查询出有关注的用户
- var validSubList = (await repository.QueryAsync<long>("select userId from n_user_statistic where focusUser>0 and userId in @subList", new { subList }, conn)).ToHashSet();
- foreach (var userId in subList)
- {
- if (CommonUtils.IsMockUser(userId)) continue;
- // 如果是无关注的用户,则推荐设置为[]
- if (!validSubList.Contains(userId))
- {
- foreach (var di in diaryLst)
- {
- await UpdatePersonalRecommendListToRedis(repository, cacheProvider, emptyPostIds, userId, 1,
- di.Key < 0 ? null : (int?)di.Key, null, dayOfMonth);
- }
- continue;
- }
- foreach (var di in diaryLst)
- {
- // 热度会被计算,所以得Clone出副本
- var rl = await GetPersonalRecommendList(repository,
- diaryLst[di.Key].Select(e => e.Clone()).ToList(), userCntHotCoefficientDic, userId,
- rpcClient, 1000);
- await UpdatePersonalRecommendListToRedis(repository, cacheProvider, rl.Select(i => i.id).ToArray(),
- userId, 1, di.Key < 0 ? null : (int?)di.Key, null,
- dayOfMonth);
- }
- }
- }
- syncNote.Append($"含用户个性化推荐【{userIds.Count}】");
- }
- if (syncRecordId > 0)
- {
- var endAt = DateTime.Now;
- await repository.QueryAsync<dynamic>("update n_post_recommend_records set endAt=@endAt, totalSecs=@totalSecs, note=@note where id=@id", new
- {
- id = syncRecordId,
- endAt,
- totalSecs = (float)(endAt - now).TotalSeconds,
- note = syncNote.ToString()
- }, null, null, false);
- }
- }
- /// <summary>
- /// 生成新文章推荐缓存,如果对应的缓存已存在,则不生成
- /// </summary>
- public static async Task GenerateNewPostRecommend(this DbRESTFulRepository repository, DateTime time,
- int categoryType, int? contentType, bool? hasTopic, RedisCacheProvider cacheProvider, ConfigFromDb config)
- {
- var minute = (time.Minute / 5) * 5;
- DateTime startTime;
- // 查询上次推荐缓存同步时间,如果有,则取同步时间,如果此时间与当前时间相差超过一天,则取24小时前的时间
- var syncTime =
- await repository.QuerySingleOrDefaultAsync<DateTime?>(
- "select startAt from n_post_recommend_records order by id desc limit 1");
- if (syncTime != null)
- {
- if ((syncTime.Value - time).TotalDays > 1)
- {
- startTime = time.AddHours(-config.PostIgnoreHotRecentHours - 24);
- }
- else
- {
- startTime = syncTime.Value.AddHours(-config.PostIgnoreHotRecentHours);
- }
- }
- else
- {
- startTime = time.AddHours(-config.PostIgnoreHotRecentHours);
- }
- var startPostId = IdWorker.GetStartIdByDate(startTime);
- var key = CacheKeys.PostNewRecommendCache($"{time.ToString("MMddHH")}{minute.ToString("00")}",
- categoryType, contentType, hasTopic);
- if (await cacheProvider.KeyExist(key)) return;
- var mustConditions = new List<dynamic>
- {
- new { range = new { id = new { gt = startPostId } } },
- new { term = new { state = 1 } },
- new { range = new { subjectiveScore = new { gt = 0 } } },
- new { term = new { categoryType } },
- };
- if (contentType == 1)
- {
- mustConditions.Add(new { term = new { type = 0 } });
- mustConditions.Add(new { range = new { coverWidth = new { gt = 0 } } });
- }
- else if (contentType == 2)
- {
- mustConditions.Add(new { term = new { type = 1 } });
- }
- var postList = new List<(double, object)>();
- var query = new
- {
- _source = new string[] { "id", "subjectiveScore" },
- size = 10000,
- query = new
- {
- @bool = new
- {
- filter = new
- {
- @bool = new
- {
- must = mustConditions
- }
- }
- }
- }
- };
- var resp = await ESHelper.Client.SearchAsync<StringResponse>(ESConstants.ESIndexName.Post,
- PostData.Serializable(query));
- if (resp.Success)
- {
- var obj = JsonConvert.DeserializeObject<JToken>(resp.Body);
- int curScore = 0;
- obj.Value<JObject>("hits")?.Value<JArray>("hits")?.ForEach(e =>
- {
- var source = e.Value<JToken>("_source");
- curScore += (int)Math.Ceiling(source.Value<float>("subjectiveScore"));
- postList.Add((curScore, source.Value<long>("id")));
- });
- }
- else
- {
- LoggerManager.Logger.Error("GetHotPostList ES查询出错 " + resp?.Body);
- }
- if (postList.Count == 0)
- {
- postList.Add((0, 0));
- }
- await cacheProvider.CSRedisClient.ZAddAsync(key, postList.ToArray());
- // 缓存10分钟
- await cacheProvider.CSRedisClient.ExpireAsync(key, 60 * 10);
- }
- /// <summary>
- /// 批量更新文章的冷却值
- /// </summary>
- public static async Task RecalcPostCoolingFactorAll(this DbRESTFulRepository repository, long startPostId, long? endPostId, ConfigFromDb config)
- {
- var batchSize = 1000;
- using var conn = repository.ConnectionManager.GetConnection(false);
- conn.Open();
- while (true)
- {
- var extQuery = endPostId == null ? "" : $" and id<{endPostId}";
- var infos = (await repository.QueryAsync<dynamic>($"select id, userId, coolingFactor from n_post where id>{startPostId} {extQuery} and state=1 and categoryType=1 order by id asc limit {batchSize}", null, conn)).ToList();
- if (infos.Count == 0) break;
- var firstPostTime = IdWorker.DecodeId(infos[0].id).Time;
- var localMinId = IdWorker.GetStartIdByDate(firstPostTime.AddHours(config.PostCoolingPeriodHours)); // 满足条件可在本地计算冷却系数的最小文章编号
- var localStartIndex = infos.FindIndex(e => e.id > localMinId); // 此这里开始就可以在本地算冷却系数了
- var dic = new Dictionary<long, float>();
- if (localStartIndex > -1)
- {
- for (var j = localStartIndex; j < infos.Count; j++)
- {
- var item = infos[j];
- var id = (long)item.id;
- var minId = IdWorker.GetStartIdByDate(IdWorker.DecodeId(id).Time.AddHours(-config.PostCoolingPeriodHours));
- // 向前找X小时内同用户的帖子数量
- var count = 1;
- for (var i = j - 1; i >= 0; i--)
- {
- var item1 = infos[i];
- if (item1.userId == item.userId && item1.id > minId)
- {
- count++;
- }
- }
- float val;
- if (count <= config.PostIgnoreCoolingMaxCountPerPeriod)
- {
- val = 1;
- }
- else
- {
- val = (float)(1 / Math.Pow(count, 0.5));
- }
- if (item.coolingFactor != val)
- {
- dic[id] = val;
- }
- }
- }
- else
- {
- localStartIndex = infos.Count;
- }
- var cacheUserPostIds = new Dictionary<long, List<long>>();
- for (var j = 0; j < localStartIndex; j++)
- {
- var item = infos[j];
- var id = (long)item.id;
- var minId = IdWorker.GetStartIdByDate(IdWorker.DecodeId((long)item.id).Time.AddHours(-config.PostCoolingPeriodHours));
- var userId = (long)item.userId;
- List<long> cachedList;
- if (cacheUserPostIds.ContainsKey(userId))
- {
- cachedList = cacheUserPostIds[userId];
- }
- else
- {
- cachedList = (await repository.QueryAsync<long>($"select id from n_post where id>{minId} and userId={userId} and state=1 order by id asc")).ToList();
- cacheUserPostIds[userId] = cachedList;
- }
- var count = cachedList.Count(e => e > minId);
- float val;
- if (count <= config.PostIgnoreCoolingMaxCountPerPeriod)
- {
- val = 1;
- }
- else
- {
- val = (float)(1 / Math.Pow(count, 0.5));
- }
- if (item.coolingFactor != val)
- {
- dic[id] = val;
- }
- cachedList.Add(item.id);
- }
- if (dic.Count > 0)
- {
- // 更新冷却值
- var list = dic.Select(e => new
- {
- id = e.Key,
- coolingFactor = e.Value
- });
- await repository.QueryAsync<dynamic>("update n_post set coolingFactor=@coolingFactor, updateAt=now() where id=@id", list, conn, null, false);
- }
- if (infos.Count < batchSize) break;
- startPostId = infos.Last().id;
- }
- }
- /// <summary>
- /// 更新单一某个文章的冷却值,同时会更新其后面的N个
- /// </summary>
- /// <param name="authorId">不传则从数据库中查询</param>
- public static async Task RecalcPostCoolingFactor(this DbRESTFulRepository repository, long postId, long authorId, ConfigFromDb config)
- {
- // 计算冷却系数
- var postCoolingPeriodHours = config.PostCoolingPeriodHours; // 冷却小时数,即在多久内计算冷却系数
- var postIgnoreCoolingMaxCountPerPeriod = config.PostIgnoreCoolingMaxCountPerPeriod; // 在冷却周期内,小于等于此值时,冷却系数为1
- // 找出此文章冷却向前向后小时已发布的文章,计算当前的以及更新向后的
- var createAt = IdWorker.DecodeId(postId).Time;
- var startId = IdWorker.GetStartIdByDate(createAt.AddHours(-postCoolingPeriodHours));
- var endId = IdWorker.GetStartIdByDate(createAt.AddHours(postCoolingPeriodHours));
- if (authorId == 0)
- {
- authorId = await repository.QuerySingleOrDefaultAsync<long>($"select userId from n_post where id={postId}");
- }
- // 未找到记录,则无法计算
- if (authorId == 0) return;
- var postInfos = (await repository.QueryAsync<dynamic>($"select id, coolingFactor, state from n_post where id>{startId} and id<{endId} and userId={authorId} order by id asc")).ToList();
- var beforeCount = 1;
- var isBefore = true;
- var dic = new Dictionary<long, float>();
- for (var i = 0; i < postInfos.Count; i++)
- {
- var id = (long)postInfos[i].id;
- var state = (int)postInfos[i].state;
- if (id != postId)
- {
- if (isBefore)
- {
- if (state == 1) beforeCount++;
- }
- else
- {
- // 如果状态不是通过,则不计算
- if (state == 1)
- {
- var _createAt = IdWorker.DecodeId(id).Time;
- var _startId = IdWorker.GetStartIdByDate(_createAt);
- var tmp = 1;
- for (var j = i; j >= 0; j--)
- {
- if ((long)postInfos[j].id > _startId && postInfos[j].state == 1)
- {
- tmp++;
- }
- }
- float val = 1;
- if (tmp > postIgnoreCoolingMaxCountPerPeriod)
- {
- val = (float)(1 / Math.Pow(tmp, 0.5));
- }
- if (val != (float)postInfos[i].coolingFactor)
- {
- dic[id] = val;
- }
- }
- }
- }
- else
- {
- isBefore = false;
- if (beforeCount >= 1 && state == 1)
- {
- float val = 1;
- // 计算当前的coolingfact
- if (beforeCount > postIgnoreCoolingMaxCountPerPeriod)
- {
- val = (float)(1 / Math.Pow(beforeCount, 0.5));
- }
- if (val != (float)postInfos[i].coolingFactor)
- {
- dic[id] = val;
- }
- }
- }
- }
- if (dic.Count > 0)
- {
- // 更新冷却值
- var list = dic.Select(e => new
- {
- id = e.Key,
- coolingFactor = e.Value
- });
- await repository.QueryAsync<dynamic>("update n_post set coolingFactor=@coolingFactor, updateAt=now() where id=@id", list, null, null, false);
- }
- }
- /// <summary>
- /// 更新所有文章的平台主观策略分、计算客观事实分 以及 热度值(此处不计算文章的冷却系数)
- /// </summary>
- public static async Task UpdateAllPostContentScore(this DbRESTFulRepository repository, ConfigFromDb config)
- {
- var size = 1000;
- var now = DateTime.Now;
- var validEndTime = now.AddHours(-config.PostIgnoreHotRecentHours);
- var endId = IdWorker.GetStartIdByDate(validEndTime);
- var sql =
- $"select p.id, p.userId, p.type, p.categoryType, p.checkCntScore, p.manualExtraScore, p.coolingFactor, u.mpRole, u.superior, u.cntHotCoefficient from n_post as p inner join n_user as u on p.userId=u.id where p.id>@startId and p.id<{endId} and categoryType=1 and p.state=1 and u.state=0 order by p.id asc limit {size}";
- using var conn = repository.ConnectionManager.GetConnection();
- conn.Open();
- var calcTaskId = await repository.QuerySingleOrDefaultAsync<int>(
- "insert into n_post_hot_all_calc_records(startAt) values(@startAt); select last_insert_id() as id;",
- new { startAt = now }, null, null, false);
- var totalCount = 0;
-
- var startId = IdWorker.GetStartIdByDate(
- string.IsNullOrEmpty(config.PostStartRecommendDate)
- ? DateTime.Now.AddDays(-config.PostMaxRecommendDays)
- : DateTime.Parse(config.PostStartRecommendDate)
- );
- var originalStartId = startId;
- while (true)
- {
- var items = (await repository.QueryAsync<dynamic>(sql, new { startId }, conn)).ToList();
- if (items.Count == 0)
- {
- break;
- }
- totalCount += items.Count;
- var itemsCount = items.Count;
- var lastId = items.Last().id;
- // 用来存文章热度值中间值
- var dic = new Dictionary<long, PostHotValue>();
- items.ForEach(e => dic[(long)e.id] = new PostHotValue());
- var itemsInBlackList = new List<dynamic>();
- // 查出在小黑屋的,如果在小黑屋,则热度为0
- var postIdInBlacklist =
- (await repository.QueryAsync<long>(
- $"select b.postId from n_stat_post_blacklist as b left join n_post as p on b.postId=p.id where b.postId>{startId} and b.postId<={lastId} and b.ctrlType>0"))
- .ToList();
- if (postIdInBlacklist.Count > 0)
- {
- var validItems = new List<dynamic>();
- items.ForEach(e =>
- {
- if (!postIdInBlacklist.Contains(e.id))
- {
- validItems.Add(e);
- }
- else
- {
- itemsInBlackList.Add(e);
- }
- });
- items = validItems;
- }
- // 计算当前文章列表的有效阅读量
- // 判断是否是今天的内容,如果是,则需要查出今天的有效阅读量
- var todayStartTime = now.AddHours(-now.Hour).AddMinutes(-now.Minute).AddSeconds(-now.Second)
- .AddMilliseconds(-now.Millisecond);
- var todayStartId = IdWorker.GetStartIdByDate(todayStartTime);
- var todayItems = items.Where(e => e.id > todayStartId && e.id < endId && e.checkCntScore > 0).ToList();
- if (todayItems.Count > 0)
- {
- var todayItemIds = todayItems.Select(e => (long)e.id).ToList();
- #region 统计有效阅读量
- // 统计阅读量
- var resp = await ESHelper.Client.SearchAsync<StringResponse>("postreadrecord",
- PostData.Serializable(new
- {
- query = new
- {
- @bool = new
- {
- must = new List<dynamic>
- {
- new
- {
- range = new
- {
- createAt = new
- {
- gte = todayStartTime.ToString("yyyy-MM-dd HH:mm:ss")
- }
- }
- },
- new
- {
- terms = new
- {
- postId = todayItemIds
- }
- }
- },
- must_not = new List<dynamic>
- {
- new
- {
- term = new { notValidRecord = true }
- }
- }
- }
- },
- size = 0,
- aggs = new
- {
- post = new
- {
- terms = new { field = "postId", size = 50000 }
- }
- }
- }));
- if (resp.Success)
- {
- var rst = JsonConvert.DeserializeObject<JObject>(resp.Body);
- var buckets = rst.Value<JObject>("aggregations").Value<JObject>("post")
- .Value<JArray>("buckets");
- if (buckets.Count > 0)
- {
- buckets.ForEach(e =>
- {
- var postId = e.Value<long>("key");
- var count = e.Value<int>("doc_count") * 3;
- dic[postId].fact += count;
- });
- }
- }
- // 统计分享量
- resp = await ESHelper.Client.SearchAsync<StringResponse>("postsharerecord", PostData.Serializable(
- new
- {
- query = new
- {
- @bool = new
- {
- must = new List<dynamic>
- {
- new
- {
- range = new
- {
- createAt = new
- {
- gte = todayStartTime.ToString("yyyy-MM-dd HH:mm:ss")
- }
- }
- },
- new
- {
- terms = new
- {
- postId = todayItemIds
- }
- }
- },
- must_not = new List<dynamic>
- {
- new
- {
- term = new { notValidRecord = true }
- }
- }
- }
- },
- size = 0,
- aggs = new
- {
- post = new
- {
- terms = new { field = "postId", size = 50000 }
- }
- }
- }));
- if (resp.Success)
- {
- var rst = JsonConvert.DeserializeObject<JObject>(resp.Body);
- var buckets = rst.Value<JObject>("aggregations").Value<JObject>("post")
- .Value<JArray>("buckets");
- if (buckets.Count > 0)
- {
- buckets.ForEach(e =>
- {
- var postId = e.Value<long>("key");
- var count = e.Value<int>("doc_count") * 6;
- dic[postId].fact += count;
- });
- }
- }
- var todayStartTimeStr = todayStartTime.ToString("yyyy-MM-dd HH:mm:ss");
- // 统计有效点赞
- var list = (await repository.QueryAsync<dynamic>(@$"
- SELECT tg.targetId as postId, count(1) as count FROM `n_user_thumbsup_gain` as tg left join n_stat_thumbsup_history as th on tg.targetId=th.postId and tg.userId=th.userId where tg.targetId>{startId} and tg.targetId<{endId} and
- tg.userId<>tg.authorId and tg.type=0 and tg.categoryType=1 and tg.createAt>='{todayStartTimeStr}' and
- th.userId is null
- group by tg.targetId
- ")).ToList();
- if (list.Count > 0)
- {
- list.ForEach(e =>
- {
- if (dic.ContainsKey(e.postId))
- {
- dic[(long)e.postId].fact += e.count * 6;
- }
- });
- }
- // 统计有效收藏
- list = (await repository.QueryAsync<dynamic>(@$"
- SELECT tg.postId, count(1) as count FROM `n_user_collect_post` as tg left join n_stat_collection_history as th on tg.postId=th.postId and tg.userId=th.userId where tg.postId>{startId} and tg.postId<{endId} and
- tg.userId<>tg.authorId and tg.categoryType=1 and tg.createAt>='{todayStartTimeStr}' and
- th.userId is null
- group by tg.postId
- ")).ToList();
- if (list.Count > 0)
- {
- list.ForEach(e =>
- {
- if (dic.ContainsKey(e.postId))
- {
- dic[(long)e.postId].fact += e.count * 6;
- }
- });
- }
- // 统计有效评论
- list = (await repository.QueryAsync<dynamic>(@$"
- select t1.postId, count(1) as count from (
- select distinct t.userId, t.postId from (
- select distinct c.userId, c.postId from n_post_comment as c
- where c.state=0 and c.isAuthor=0 and c.createAt>='{todayStartTimeStr}' and c.postId>{startId} and c.postId<{endId}
- UNION
- select distinct r.userId, r.postId from n_post_reply as r left join n_post_comment as c1 on r.commentId=c1.id
- where r.state=0 and r.isAuthor=0 and r.createAt>='{todayStartTimeStr}' and c1.state=0 and r.postId>{startId} and r.postId<{endId}
- ) as t
- ) as t1 left join n_stat_comment_history as ch on t1.postId=ch.postId and t1.userId=ch.userId
- where ch.postId is null
- group by t1.postId
- ")).ToList();
- if (list.Count > 0)
- {
- list.ForEach(e =>
- {
- if (dic.ContainsKey(e.postId))
- {
- dic[(long)e.postId].fact += e.count * 6;
- }
- });
- }
- #endregion 统计有效阅读量
- }
- // 看是否存在非今日的,如果存在,则需要在每日统计表中查询数据
- var notTodayItems = items.Where(e => e.id < todayStartId && e.id < endId && e.checkCntScore > 0)
- .ToList();
- if (notTodayItems.Count > 0)
- {
- var notTodayItemIds = notTodayItems.Select(e => (long)e.id).ToList();
- // 各表分别统计
- var readStat = await repository.QueryAsync<dynamic>(
- "select postId, sum(normalReadCount+fanReadCount) as count from n_stat_read where postId in @ids group by postId",
- new { ids = notTodayItemIds });
- readStat.ForEach(e => { dic[(long)e.postId].fact += (float)e.count * 3; });
- var commentStat = await repository.QueryAsync<dynamic>(
- "select postId, sum(commentCount) as count from n_stat_comment where postId in @ids group by postId",
- new { ids = notTodayItemIds });
- commentStat.ForEach(e => { dic[(long)e.postId].fact += (float)e.count * 6; });
- var thumbsupStat = await repository.QueryAsync<dynamic>(
- "select postId, sum(thumbsupCount) as count from n_stat_thumbsup where postId in @ids group by postId",
- new { ids = notTodayItemIds });
- thumbsupStat.ForEach(e => { dic[(long)e.postId].fact += (float)e.count * 6; });
- var collectionStat = await repository.QueryAsync<dynamic>(
- "select postId, sum(collectionCount) as count from n_stat_collection where postId in @ids group by postId",
- new { ids = notTodayItemIds });
- collectionStat.ForEach(e => { dic[(long)e.postId].fact += (float)e.count * 6; });
- var shareStat = await repository.QueryAsync<dynamic>(
- "select postId, sum(shareCount) as count from n_stat_share where postId in @ids group by postId",
- new { ids = notTodayItemIds });
- shareStat.ForEach(e => { dic[(long)e.postId].fact += (float)e.count * 6; });
- }
- // 如果客观事实数据最大为3500
- dic.ForEach(e =>
- {
- if (e.Value.fact > 3500) dic[e.Key].fact = 3500;
- });
- var esRecords = new List<dynamic>();
- items.ForEach(e =>
- {
- var id = (long)e.id;
- var roleScore = PostHotValue.GetRoleScore(e.mpRole, e.superior > 0);
- var checkCntScore = (float)e.checkCntScore;
- var cntTypeFactor = e.type > 0 ? 1.3f : 1;
- var coolingFactor = (float)e.coolingFactor;
- var manualExtraScore = (float)e.manualExtraScore;
- var cntHotCoefficient = (float)e.cntHotCoefficient;
- var subjectiveScore = checkCntScore > 0
- ? (roleScore + checkCntScore) * cntTypeFactor * coolingFactor + manualExtraScore
- : 0;
- var days = Math.Ceiling((now - IdWorker.DecodeId(id).Time).TotalDays);
- if (days == 0) days = 1;
- var objectiveScore = dic.ContainsKey(id) ? dic[id].fact : 0;
- var cntHot = checkCntScore > 0
- ? (float)((subjectiveScore + objectiveScore) / Math.Pow(days, 1.6) * cntHotCoefficient)
- : 0;
- esRecords.Add(new
- {
- update = new
- {
- _id = id
- }
- });
- esRecords.Add(new
- {
- doc = new
- {
- subjectiveScore,
- hot = cntHot
- }
- });
- });
- if (esRecords.Count > 0)
- {
- var resp = await ESHelper.Client.BulkAsync<StringResponse>(ESConstants.ESIndexName.Post,
- PostData.MultiJson(esRecords));
- if (!resp.Success)
- {
- LoggerManager.Logger.Error("ES更新笔记热度值失败\r\n" + resp.Body);
- }
- }
- startId = lastId;
- if (itemsCount < size) break;
- }
- if (originalStartId > 0)
- {
- // 将设置过期的文章热度值为0
- var rsp = await ESHelper.Client.UpdateByQueryAsync<StringResponse>(ESConstants.ESIndexName.Post,
- PostData.Serializable(new
- {
- script = new { source = $"ctx._source['hot']=0" },
- query = new
- {
- @bool = new
- {
- must = new List<dynamic>
- {
- new { term = new { categoryType = 1 } },
- new { range = new { hot = new { gt = 0 } } },
- new { range = new { id = new { lt = originalStartId } } },
- }
- }
- }
- }));
- if (!rsp.Success)
- {
- LoggerManager.Logger.Error("ES文章热度归零失败:\r\n" + rsp.Body);
- }
- }
- if (calcTaskId > 0)
- {
- var endAt = DateTime.Now;
- await repository.QueryAsync<dynamic>(
- "update n_post_hot_all_calc_records set endAt=@endAt, totalSecs=@totalSecs, note=@note where id=@id",
- new
- {
- id = calcTaskId,
- endAt,
- totalSecs = (float)(endAt - now).TotalSeconds,
- note = $"处理总计{totalCount}条文章记录"
- }, null, null, false);
- }
- }
- /// <summary>
- /// 计算某些文章的平台主观策略分、客观事实分 以及 热度值(此处不计算文章的冷却系数)
- /// </summary>
- /// <param name="calcReadScore">是否计算客观事件(有效阅读)及综合热度值</param>
- /// <param name="extraManualExtraScore">额外附加的人工干预分数,用来测算合适的人工干预分</param>
- public static async Task<Dictionary<long, PostHotValue>> CalcPostsHot(this DbRESTFulRepository repository, List<long> postIds, ConfigFromDb config, bool calcReadScore = true, float? extraManualExtraScore = null, DateTime? specifyCalcTime = null)
- {
- postIds = postIds.OrderBy(e => e).ToList();
- // 用来存文章热度值中间值
- var dic = new Dictionary<long, PostHotValue>();
- postIds.ForEach(e =>
- {
- dic[e] = new PostHotValue();
- });
- if (postIds.Count == 0)
- {
- return dic;
- }
- var sql = $"select p.id, p.userId, p.type, p.categoryType, p.checkCntScore, p.manualExtraScore, p.coolingFactor, u.mpRole, u.superior, u.cntHotCoefficient from n_post as p left join n_user as u on p.userId=u.id where p.id in @postIds and categoryType=1 and p.state=1 and p.checkCntScore>0 order by p.id asc";
- using var conn = repository.ConnectionManager.GetConnection();
- conn.Open();
- var items = (await repository.QueryAsync<dynamic>(sql, new { postIds }, conn)).ToList();
- if (items.Count == 0)
- {
- return dic;
- }
-
- var calcTime = specifyCalcTime ?? DateTime.Now;
-
- var itemsInBlackList = new List<dynamic>();
- var validPostIds = items.Select(e => (long)e.id).ToList();
- if (validPostIds.Count > 0)
- {
- // 查出在小黑屋的,如果在小黑屋,则热度为0
- var postIdInBlacklist = (await repository.QueryAsync<long>($"select postId from n_stat_post_blacklist where postId in @postIds and ctrlType>0", new { postIds = validPostIds }, conn)).ToList();
- if (postIdInBlacklist.Count > 0)
- {
- var validItems = new List<dynamic>();
- items.ForEach(e =>
- {
- if (postIdInBlacklist.Contains(e.id))
- {
- validItems.Add(e);
- }
- else
- {
- itemsInBlackList.Add(e);
- }
- });
- items = validItems;
- }
- }
- // 综合热度及客观事实分
- // 不计算最近6小时内的
- var validEndTime = calcTime.AddHours(-config.PostIgnoreHotRecentHours);
- var validEndId = IdWorker.GetStartIdByDate(validEndTime);
- var expiredMinId = IdWorker.GetStartIdByDate(
- string.IsNullOrEmpty(config.PostStartRecommendDate)
- ? DateTime.Now.AddDays(-config.PostMaxRecommendDays)
- : DateTime.Parse(config.PostStartRecommendDate)
- );
-
- if (calcReadScore)
- {
- // 计算当前文章列表的有效阅读量
- // 判断是否是今天的内容,如果是,则需要查出今天的有效阅读量
- var todayStartTime = calcTime.AddHours(-calcTime.Hour).AddMinutes(-calcTime.Minute).AddSeconds(-calcTime.Second).AddMilliseconds(-calcTime.Millisecond);
- var todayStartId = IdWorker.GetStartIdByDate(todayStartTime);
- var todayItems = items.Where(e => e.id > todayStartId && e.id < validEndId).ToList();
- if (todayItems.Count > 0)
- {
- var todayItemIds = todayItems.Select(e => (long)e.id).ToList();
- #region 统计有效阅读量
- // 统计阅读量
- var resp = await ESHelper.Client.SearchAsync<StringResponse>("postreadrecord", PostData.Serializable(new
- {
- query = new
- {
- @bool = new
- {
- must = new List<dynamic>
- {
- new
- {
- range = new
- {
- createAt = new
- {
- gte = todayStartTime.ToString("yyyy-MM-dd HH:mm:ss")
- }
- }
- },
- new
- {
- terms = new
- {
- postId = todayItemIds
- }
- }
- },
- must_not = new List<dynamic>
- {
- new
- {
- term = new { notValidRecord = true }
- }
- }
- }
- },
- size = 0,
- aggs = new
- {
- post = new
- {
- terms = new { field = "postId", size = 50000 }
- }
- }
- }));
- if (resp.Success)
- {
- var rst = JsonConvert.DeserializeObject<JObject>(resp.Body);
- var buckets = rst.Value<JObject>("aggregations").Value<JObject>("post").Value<JArray>("buckets");
- if (buckets.Count > 0)
- {
- buckets.ForEach(e =>
- {
- var postId = e.Value<long>("key");
- var count = e.Value<int>("doc_count") * 3;
- dic[postId].fact += count;
- });
- }
- }
- // 统计分享量
- resp = await ESHelper.Client.SearchAsync<StringResponse>("postsharerecord", PostData.Serializable(new
- {
- query = new
- {
- @bool = new
- {
- must = new List<dynamic>
- {
- new
- {
- range = new
- {
- createAt = new
- {
- gte = todayStartTime.ToString("yyyy-MM-dd HH:mm:ss")
- }
- }
- },
- new
- {
- terms = new
- {
- postId = todayItemIds
- }
- }
- },
- must_not = new List<dynamic>
- {
- new
- {
- term = new { notValidRecord = true }
- }
- }
- }
- },
- size = 0,
- aggs = new
- {
- post = new
- {
- terms = new { field = "postId", size = 50000 }
- }
- }
- }));
- if (resp.Success)
- {
- var rst = JsonConvert.DeserializeObject<JObject>(resp.Body);
- var buckets = rst.Value<JObject>("aggregations").Value<JObject>("post").Value<JArray>("buckets");
- if (buckets.Count > 0)
- {
- buckets.ForEach(e =>
- {
- var postId = e.Value<long>("key");
- var count = e.Value<int>("doc_count") * 6;
- dic[postId].fact += count;
- });
- }
- }
- var todayStartTimeStr = todayStartTime.ToString("yyyy-MM-dd HH:mm:ss");
- // 统计有效点赞
- var list = (await repository.QueryAsync<dynamic>(@$"
- SELECT tg.targetId as postId, count(1) as count FROM `n_user_thumbsup_gain` as tg left join n_stat_thumbsup_history as th on tg.targetId=th.postId and tg.userId=th.userId where tg.targetId in @todayItemIds and
- tg.userId<>tg.authorId and tg.type=0 and tg.categoryType=1 and tg.createAt>='{todayStartTimeStr}' and
- th.userId is null
- group by tg.targetId
- ", new { todayItemIds }, conn)).ToList();
- if (list.Count > 0)
- {
- list.ForEach(e =>
- {
- dic[(long)e.postId].fact += e.count * 6;
- });
- }
- // 统计有效收藏
- list = (await repository.QueryAsync<dynamic>(@$"
- SELECT tg.postId, count(1) as count FROM `n_user_collect_post` as tg left join n_stat_collection_history as th on tg.postId=th.postId and tg.userId=th.userId where tg.postId in @todayItemIds and
- tg.userId<>tg.authorId and tg.categoryType=1 and tg.createAt>='{todayStartTimeStr}' and
- th.userId is null
- group by tg.postId
- ", new { todayItemIds }, conn)).ToList();
- if (list.Count > 0)
- {
- list.ForEach(e =>
- {
- if (dic.ContainsKey(e.postId))
- {
- dic[(long)e.postId].fact += e.count * 6;
- }
- });
- }
- // 统计有效评论
- list = (await repository.QueryAsync<dynamic>(@$"
- select t1.postId, count(1) as count from (
- select distinct t.userId, t.postId from (
- select distinct c.userId, c.postId from n_post_comment as c
- where c.state=0 and c.isAuthor=0 and c.createAt>='{todayStartTimeStr}' and c.postId in @todayItemIds
- UNION
- select distinct r.userId, r.postId from n_post_reply as r left join n_post_comment as c1 on r.commentId=c1.id
- where r.state=0 and r.isAuthor=0 and r.createAt>='{todayStartTimeStr}' and c1.state=0 and r.postId in @todayItemIds
- ) as t
- ) as t1 left join n_stat_comment_history as ch on t1.postId=ch.postId and t1.userId=ch.userId
- where ch.postId is null
- group by t1.postId
- ", new { todayItemIds }, conn)).ToList();
- if (list.Count > 0)
- {
- list.ForEach(e =>
- {
- dic[(long)e.postId].fact += e.count * 6;
- });
- }
- #endregion 统计有效阅读量
- }
- // 看是否存在非今日的,如果存在,则需要在每日统计表中查询数据
- var notTodayItems = items.Where(e => e.id < todayStartId && e.id > expiredMinId && e.id < validEndId).ToList();
- if (notTodayItems.Count > 0)
- {
- var notTodayItemIds = notTodayItems.Select(e => (long)e.id).ToList();
- // 各表分别统计
- var readStat = await repository.QueryAsync<dynamic>("select postId, sum(normalReadCount+fanReadCount) as count from n_stat_read where postId in @ids group by postId", new { ids = notTodayItemIds }, conn);
- readStat.ForEach(e =>
- {
- dic[(long)e.postId].fact += (float)e.count * 3;
- });
- var commentStat = await repository.QueryAsync<dynamic>("select postId, sum(commentCount) as count from n_stat_comment where postId in @ids group by postId", new { ids = notTodayItemIds }, conn);
- commentStat.ForEach(e =>
- {
- dic[(long)e.postId].fact += (float)e.count * 6;
- });
- var thumbsupStat = await repository.QueryAsync<dynamic>("select postId, sum(thumbsupCount) as count from n_stat_thumbsup where postId in @ids group by postId", new { ids = notTodayItemIds }, conn);
- thumbsupStat.ForEach(e =>
- {
- dic[(long)e.postId].fact += (float)e.count * 6;
- });
- var collectionStat = await repository.QueryAsync<dynamic>("select postId, sum(collectionCount) as count from n_stat_collection where postId in @ids group by postId", new { ids = notTodayItemIds });
- collectionStat.ForEach(e =>
- {
- dic[(long)e.postId].fact += (float)e.count * 6;
- });
- var shareStat = await repository.QueryAsync<dynamic>("select postId, sum(shareCount) as count from n_stat_share where postId in @ids group by postId", new { ids = notTodayItemIds }, conn);
- shareStat.ForEach(e =>
- {
- dic[(long)e.postId].fact += (float)e.count * 6;
- });
- }
- // 如果客观事实数据最大为3500
- dic.ForEach(e =>
- {
- if (e.Value.fact > 3500) dic[e.Key].fact = 3500;
- });
- }
- items.ForEach(e =>
- {
- var id = (long)e.id;
- var roleScore = PostHotValue.GetRoleScore(e.mpRole, e.superior > 0);
- var checkCntScore = (float)e.checkCntScore;
- var cntTypeFactor = e.type > 0 ? 1.3f : 1;
- var coolingFactor = (float)e.coolingFactor;
- var manualExtraScore = ((float)e.manualExtraScore + (extraManualExtraScore ?? 0));
- var cntHotCoefficient = (float)e.cntHotCoefficient;
- var subjectiveScore = checkCntScore > 0 ? (roleScore + checkCntScore) * cntTypeFactor * coolingFactor + manualExtraScore : 0;
- var expired = e.id < expiredMinId;
- dic[id].subjectiveScore = subjectiveScore;
- dic[id].cntHotCoefficient = cntHotCoefficient;
- if (calcReadScore && !expired && id < validEndId)
- {
- var days = (float)Math.Ceiling((calcTime - IdWorker.DecodeId(id).Time).TotalDays);
- if (days == 0) days = 1;
- var objectiveScore = dic.ContainsKey(id) ? dic[id].fact : 0;
- dic[id].hot = (float)((subjectiveScore + objectiveScore) / Math.Pow(days, 1.6) * cntHotCoefficient);
- }
- });
- return dic;
- }
- /// <summary>
- /// 同步单篇内容
- /// </summary>
- public static async Task SyncPostSingle(this DbRESTFulRepository repository, long id, OSSConfig ossConfig, OssClient ossClient, ConfigFromDb config, bool calcReadScore = true)
- {
- var postInfo = await repository.QuerySingleOrDefaultAsync<dynamic>("select id, state, title, thumbnails, type, originalTopicNames, tags, summary, contentState, videoId, content, createAt, userId, categoryType, onlyInTopic, noMoreCont, resourceCode, primaryType, otherPrimaryTypes, isBest, coverWidth, coverHeight, previewable, reprintSource, reprintLink, checkCntScore from n_post where id=@id", new { id });
- if (postInfo == null)
- {
- await ESHelper.Client.DeleteAsync<StringResponse>(ESConstants.ESIndexName.Post, id.ToString());
- }
- else
- {
- await repository.RecalcPostCoolingFactor(id, (long)postInfo.userId, config);
- await repository.SyncPostList2ES(DateTime.Now, new List<dynamic> { postInfo }, ossConfig, ossClient, config, calcReadScore);
- }
- }
- /// <summary>
- /// 返回最后一条同步的动态Id,如果没有,则返回null。(id, state, title, thumbnails, type, originalTopicNames, tags, summary, contentState, videoId, content, createAt, userId, categoryType, onlyInTopic, noMoreCont, resourceCode, primaryType, otherPrimaryTypes, isBest, coverWidth, coverHeight, previewable, reprintSource, reprintLink, checkCntScore)
- /// </summary>
- public static async Task<long?> SyncPostList2ES(this DbRESTFulRepository repository, DateTime now, List<dynamic> posts, OSSConfig ossConfig, OssClient ossClient, ConfigFromDb config, bool calcReadScore = true)
- {
- if (posts.Count == 0) return null;
- long? lastId = null;
- var userIds = posts.Select(e => e.userId).Distinct().ToList();
- Dictionary<long, string> userDic = null;
- if (userIds.Count > 0)
- {
- var users = (await repository.QueryAsync<dynamic>("select id, alias from n_user where id in @ids", new { ids = userIds })).ToList();
- userDic = new Dictionary<long, string>(users.Count);
- foreach (var item in users)
- {
- userDic[item.id] = item.alias;
- }
- }
- var postTopicNamesDic = new Dictionary<long, List<string>>();
- var allTopicNames = new HashSet<string>();
- posts.Where(e => !string.IsNullOrEmpty((string)e.originalTopicNames)).ForEach(e =>
- {
- var names = ((string)e.originalTopicNames).Split('\n', StringSplitOptions.RemoveEmptyEntries).Distinct().ToList();
- postTopicNamesDic[(long)e.id] = names;
- names.ForEach(n => allTopicNames.Add(n));
- });
- Dictionary<string, dynamic> topicNameInfoDic = new Dictionary<string, dynamic>();
- if (allTopicNames.Count > 0)
- {
- var topics = (await repository.QueryAsync<dynamic>("select id, title, disabled from n_topic where title in @names", new { names = allTopicNames })).ToList();
- topics.ForEach(e =>
- {
- topicNameInfoDic[(string)e.title] = e;
- });
- }
- var postIds = new List<long>();
- var statistic = await repository.QueryAsync<dynamic>("select postId, hot, thumbsup, collected, `comment`, `read` from n_post_statistic where postId in @ids", new { ids = posts.Select(e => e.id) });
- var statisticDic = new Dictionary<long, dynamic>();
- foreach (var item in statistic)
- {
- statisticDic[(long)item.postId] = item;
- }
- var resourceDic = new Dictionary<string, string>();
- var resourceCodes = new HashSet<string>();
- posts.ForEach(e =>
- {
- postIds.Add((long)e.id);
- if (!string.IsNullOrEmpty(e.resourceCode))
- {
- ((string)e.resourceCode).Split(',', StringSplitOptions.RemoveEmptyEntries).ForEach(i => resourceCodes.Add(i));
- }
- });
- if (resourceCodes.Count > 0)
- {
- var resourceInfos = await repository.QueryAsync<dynamic>("select code, name from n_resource where code in @codes", new { codes = resourceCodes.ToList() });
- resourceInfos.ForEach(e =>
- {
- resourceDic.Add(e.code, e.name);
- });
- }
- var hotDic = await repository.CalcPostsHot(postIds, config, calcReadScore);
- var records = new List<dynamic>();
- foreach (var post in posts)
- {
- var postId = (long)post.id;
- records.Add(new
- {
- delete = new { _id = postId }
- });
- // ES只存储审核通过且内容完整(即视频是上传处理完成的)的动态
- if (post.state != 1 || post.contentState != 1) continue;
- records.Add(new
- {
- create = new { _id = postId }
- });
- string tagsStr = post.tags;
- List<string> tags = tagsStr.Split(',', StringSplitOptions.RemoveEmptyEntries).Distinct().ToList();
- var statisticItem = statisticDic.ContainsKey(post.id) ? statisticDic[post.id] : null;
- string content = string.Empty;
- try
- {
- if (post.noMoreCont > 0)
- {
- content = post.content ?? string.Empty;
- }
- else
- {
- content = await FileOperator.GetFileContent("jzq_" + post.id.ToString(), ossConfig.OSSBucket, ossClient);
- }
- }
- catch
- {
- // ignored
- }
- var primaryTypes = new List<string>();
- primaryTypes.Add(post.primaryType.ToString());
- if (!string.IsNullOrEmpty(post.otherPrimaryTypes))
- {
- primaryTypes.AddRange((post.otherPrimaryTypes as string).Split(',', StringSplitOptions.RemoveEmptyEntries));
- }
- var codes = ((string)post.resourceCode).Split(',', StringSplitOptions.RemoveEmptyEntries);
- var validCodes = new List<string>();
- var validNames = new List<string>();
- codes.ForEach(e =>
- {
- if (!resourceDic.TryGetValue(e, out var value)) return;
- validCodes.Add(e);
- validNames.Add(value);
- });
- var topicInfoList = postTopicNamesDic.ContainsKey(postId) && postTopicNamesDic[postId].Count > 0 ? postTopicNamesDic[postId].Where(e => topicNameInfoDic.ContainsKey(e)).Select(e => topicNameInfoDic[e]).ToList() : new List<dynamic>();
- var topicId = 0;
- var topicName = string.Empty;
- int[] topicIds = null;
- int[] validTopicIds = null;
- string[] topicNames = null;
- string[] validTopicNames = null;
- if (topicInfoList.Count > 0)
- {
- topicIds = topicInfoList.Select(e => (int)e.id).ToArray();
- topicNames = topicInfoList.Select(e => (string)e.title).ToArray();
- validTopicIds = topicInfoList.Where(e => (int)e.disabled == 0).Select(e => (int)e.id).ToArray();
- validTopicNames = topicInfoList.Where(e => (int)e.disabled == 0).Select(e => (string)e.title).ToArray();
- topicId = topicInfoList.First().id;
- topicName = topicInfoList.First().name;
- }
- else
- {
- topicIds = Array.Empty<int>();
- topicNames = Array.Empty<string>();
- validTopicIds = Array.Empty<int>();
- validTopicNames = Array.Empty<string>();
- }
- records.Add(new ESPostModel()
- {
- id = postId,
- title = post.title,
- tags = tags.ToArray(),
- thumbnails = post.thumbnails,
- type = (short)post.type,
- userId = post.userId,
- userAlias = userDic != null && userDic.ContainsKey(post.userId) ? userDic[post.userId] : "",
- summary = post.summary,
- contentState = post.contentState,
- videoId = post.videoId,
- content = content,
- createAt = post.createAt.ToString("yyyy-MM-dd HH:mm:ss"),
- subjectiveScore = post.checkCntScore > 0 && hotDic.ContainsKey(postId) ? hotDic[postId].subjectiveScore : 0,
- hot = post.checkCntScore > 0 && hotDic.ContainsKey(postId) ? hotDic[postId].hot : 0,
- topicId = topicId,
- topicName = topicName,
- topicIds = topicIds,
- topicNames = topicNames,
- validTopicIds = validTopicIds,
- validTopicNames = validTopicNames,
- noMoreCont = post.noMoreCont,
- state = (short)post.state,
- categoryType = (short)post.categoryType,
- onlyInTopic = (short)post.onlyInTopic,
- resourceCodes = validCodes.ToArray(),
- resourceNames = validNames.ToArray(),
- primaryType = post.primaryType,
- primaryTypes = primaryTypes.Distinct().ToArray(),
- isBest = post.isBest,
- previewable = post.previewable > 0,
- coverHeight = post.coverHeight,
- coverWidth = post.coverWidth,
- reprintSource = post.reprintSource,
- reprintLink = post.reprintLink,
- });
- lastId = post.id;
- }
- await ESHelper.Client.BulkAsync<StringResponse>(ESConstants.ESIndexName.Post, PostData.MultiJson(records));
- return lastId;
- }
- /// <summary>
- /// 单独给某一篇文章添加有效阅读量
- /// </summary>
- public static async Task AddSingleReadRecord(this DbRESTFulRepository repository, long postId, long authorId, int categoryType, int count = 1)
- {
- if (count == 0) return;
- var esRecrods = new List<dynamic>();
- for (int i = 0; i < count; i++)
- {
- esRecrods.Add(new { create = new { } });
- esRecrods.Add(new ESPostReadRecordModel
- {
- postId = postId,
- authorId = authorId,
- categoryType = categoryType,
- userId = 0,
- isAuthor = false,
- isFan = false,
- isMock = true,
- clientId = "",
- ip = "",
- createAt = DateTime.Now.ToString("yyyy-MM-dd HH:mm:ss"),
- });
- }
- // 存入ES
- await ESHelper.Client.BulkAsync<StringResponse>("postreadrecord", PostData.MultiJson(esRecrods));
- // 更新数据库中统计数据
- await repository.QueryAsync<dynamic>($"update n_post_statistic set `read`=`read`+{count}, `normalRead`=`normalRead`+{count} where postId={postId};", null, null, null, false);
- }
- /// <summary>
- /// 尝试删除某文章对应的热门讨论配置
- /// </summary>
- public static async Task HotDiscussTryToRemove(this DbRESTFulRepository repository, long postId, int? topicId = null)
- {
- var query = topicId > 0 ? $"postId={postId} and topicId={topicId}" : $"postId={postId}";
- using var conn = repository.ConnectionManager.GetConnection(false);
- conn.Open();
- await repository.QueryAsync<dynamic>($"delete from n_post_topic_hot where {query}", null,
- conn);
- await repository.QueryAsync<dynamic>(@$"update n_post_topic_hot_records set removeAt=now()
- where {query} and removeAt is null", null, conn);
- }
- }
- }
|