diff --git a/src/index-tool/pom.xml b/src/index-tool/pom.xml index 6823fa78fb6cdc72ea5fe80ed8ba00179477055b..9c1b0066e6a17b96e67917a3e3b9d5afa651cbd7 100644 --- a/src/index-tool/pom.xml +++ b/src/index-tool/pom.xml @@ -70,6 +70,19 @@ spring-boot-starter-tomcat provided + + + + org.commonmark + commonmark + 0.18.1 + + + org.commonmark + commonmark-ext-gfm-tables + 0.18.1 + + org.springframework.boot spring-boot-starter-test diff --git a/src/index-tool/src/main/java/com/opengauss/config/ElasticSearchConfig.java b/src/index-tool/src/main/java/com/opengauss/config/ElasticSearchConfig.java index 8c13aa2321d9018beb157080f1c0e77a40acfad1..a64800477354813afd4139b9977dad5980f5171c 100644 --- a/src/index-tool/src/main/java/com/opengauss/config/ElasticSearchConfig.java +++ b/src/index-tool/src/main/java/com/opengauss/config/ElasticSearchConfig.java @@ -4,6 +4,7 @@ import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; +import org.apache.http.client.config.RequestConfig; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.nio.conn.ssl.SSLIOSessionStrategy; import org.elasticsearch.client.RestClient; @@ -35,6 +36,9 @@ public class ElasticSearchConfig { @Value("${elasticsearch.port}") private int port; + @Value("${scheduled.cron}") + private String dd; + static TrustManager[] trustAllCerts = new TrustManager[]{new X509TrustManager() { @Override public void checkClientTrusted(X509Certificate[] chain, String authType) throws CertificateException { @@ -54,6 +58,7 @@ public class ElasticSearchConfig { @Bean public RestHighLevelClient restHighLevelClient() { + final CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(AuthScope.ANY, new UsernamePasswordCredentials(userName, password)); @@ -71,6 +76,21 @@ public class ElasticSearchConfig { "https")).setHttpClientConfigCallback(httpClientConfigCallback); RestHighLevelClient client = new RestHighLevelClient(builder); return client; + + +// RestHighLevelClient highLevelClient = new RestHighLevelClient( +// RestClient.builder(new HttpHost("127.0.0.1", 9200, "http")) +// .setRequestConfigCallback(new RestClientBuilder.RequestConfigCallback() { +// // 该方法接收一个RequestConfig.Builder对象,对该对象进行修改后然后返回。 +// @Override +// public RequestConfig.Builder customizeRequestConfig( +// RequestConfig.Builder requestConfigBuilder) { +// return requestConfigBuilder.setConnectTimeout(5000 * 1000) // 连接超时(默认为1秒) +// .setSocketTimeout(6000 * 1000);// 套接字超时(默认为30秒)//更改客户端的超时限制默认30秒现在改为100*1000分钟 +// } +// }));// 调整最大重试超时时间(默认为30秒).setMaxRetryTimeoutMillis(60000); +// +// return highLevelClient; } public static class NullHostNameVerifier implements HostnameVerifier { diff --git a/src/index-tool/src/main/java/com/opengauss/controller/Sttt.java b/src/index-tool/src/main/java/com/opengauss/controller/Sttt.java new file mode 100644 index 0000000000000000000000000000000000000000..a987d17bd65cb70042edacaf70d05313b65c0e45 --- /dev/null +++ b/src/index-tool/src/main/java/com/opengauss/controller/Sttt.java @@ -0,0 +1,61 @@ +package com.opengauss.controller; + +import com.opengauss.service.SearchService; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.IOUtils; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.ApplicationArguments; +import org.springframework.boot.ApplicationRunner; +import org.springframework.stereotype.Component; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; + + +@Component +@Slf4j +public class Sttt implements ApplicationRunner { + + private static final String[] TYPES = {"docs", "blogs", "events", "news"}; +// private static final String[] TYPES = {"blogs", "events", "news"}; + +// private static final String[] TYPES = {"events"}; + +// private static final String[] TYPES = {"docs"}; + +// private static final String[] TYPES = {"blogs"}; + +// private static final String[] TYPES = {"news"}; + + + @Value("${update.shell}") + private String updateShellPath; + @Autowired + private SearchService searchService; + + + @Override + public void run(ApplicationArguments args) { + + Process process; + try { + log.info("===============开始拉取仓库资源================="); + process = Runtime.getRuntime().exec(updateShellPath); + process.waitFor(); + List result = IOUtils.readLines(process.getInputStream(), StandardCharsets.UTF_8); + log.info(result.toString()); + log.info("===============仓库资源拉取成功================="); + + } catch (IOException | InterruptedException e) { + log.error(e.getMessage()); + } + + for (String type : TYPES) { + searchService.refreshDoc(type); + } + + } + +} \ No newline at end of file diff --git a/src/index-tool/src/main/java/com/opengauss/entity/Article.java b/src/index-tool/src/main/java/com/opengauss/entity/Article.java index d447a3b67c11600dbbbf23b5d80ccd7c3dbe7c94..5dcce7c45fad645c072891cc217e0ed01f471c4f 100644 --- a/src/index-tool/src/main/java/com/opengauss/entity/Article.java +++ b/src/index-tool/src/main/java/com/opengauss/entity/Article.java @@ -18,7 +18,7 @@ import java.io.Serializable; @AllArgsConstructor @NoArgsConstructor @Accessors(chain = true) -@Document(indexName = "gauss_articles", shards = 5, replicas = 1) +@Document(indexName = "chenyang_gauss_articles", shards = 5, replicas = 1) public class Article implements Serializable { private static final long serialVersionUID = 5842476471171664561L; @Id diff --git a/src/index-tool/src/main/java/com/opengauss/entity/Tips.java b/src/index-tool/src/main/java/com/opengauss/entity/Tips.java index 0e1c4ce49ea8317cd8bd4e17e080db078ca6b5fc..d663bd8bfbdb1d91a8da9f16f7960c696aaa81fe 100644 --- a/src/index-tool/src/main/java/com/opengauss/entity/Tips.java +++ b/src/index-tool/src/main/java/com/opengauss/entity/Tips.java @@ -15,7 +15,7 @@ import java.io.Serializable; @AllArgsConstructor @NoArgsConstructor @Accessors(chain = true) -@Document(indexName = "gauss_tips", shards = 5, replicas = 1) +@Document(indexName = "chenyang_gauss_tips", shards = 5, replicas = 1) public class Tips implements Serializable { private static final long serialVersionUID = -5333177350140861244L; diff --git a/src/index-tool/src/main/java/com/opengauss/service/impl/SearchServiceImpl.java b/src/index-tool/src/main/java/com/opengauss/service/impl/SearchServiceImpl.java index de9dfbab953d515c3d7d0bb287433a0432537ffb..80a4ba42460f76743a520db35b6690dd47135235 100644 --- a/src/index-tool/src/main/java/com/opengauss/service/impl/SearchServiceImpl.java +++ b/src/index-tool/src/main/java/com/opengauss/service/impl/SearchServiceImpl.java @@ -3,12 +3,16 @@ package com.opengauss.service.impl; import com.alibaba.fastjson.JSONArray; import com.opengauss.constant.TypeConstants; import com.opengauss.entity.Article; +import com.opengauss.exception.ServiceException; import com.opengauss.repository.ArticleRepository; import com.opengauss.repository.TipsRepository; import com.opengauss.service.SearchService; import com.opengauss.utils.DataFromFileUtil; +import com.opengauss.utils.IdUtil; +import com.opengauss.utils.ParseHtmlUtil; import com.opengauss.vo.SearchCondition; import lombok.extern.slf4j.Slf4j; +import org.apache.commons.io.FileUtils; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.RequestOptions; @@ -32,6 +36,7 @@ import org.springframework.util.StringUtils; import java.io.File; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.*; @Service @@ -54,29 +59,127 @@ public class SearchServiceImpl implements SearchService { @Override @Transactional(rollbackFor = Exception.class) public void refreshDoc(String type) { - log.info(String.format("===============开始解析%s代码=============", type)); - JSONArray array = new JSONArray(); + + File indexFile = new File(basePath + type); + if (!indexFile.exists()) { + log.error(String.format("%s 文件夹不存在", indexFile.getPath())); + log.error("服务器开小差了"); + return; + } + + articleRepository.deleteByType(type); + log.info("删除es - type:" + type); + if (TypeConstants.DOCS.equals(type)) { File file = new File(basePath + type); for (File versionFile : Objects.requireNonNull(file.listFiles())) { - array.addAll(DataFromFileUtil.readFromFile(basePath, versionFile.getName(), type)); + readFromFile(basePath, versionFile.getName(), type); } } else { - array = DataFromFileUtil.readFromFile(basePath, "", type); + readFromFile(basePath, "", type); + } + + log.info("更新数据成功:" + type); + } + + + public void readFromFile(String basePath, String version, String type) { + File indexFile = new File(basePath + type); + if (!indexFile.exists()) { + log.error(String.format("%s 文件夹不存在", indexFile.getPath())); + throw new ServiceException("服务器开小差了"); } - log.info(String.format("===============解析%s代码成功,开始更新es=============", type)); - if (array.size() > 0) { - List
list = array.toJavaList(Article.class); - articleRepository.deleteByType(type); + File[] languageDir; + if (StringUtils.hasText(version)) { + File[] versionFiles = indexFile.listFiles(file -> version.equals(file.getName())); + if (null == versionFiles || versionFiles.length == 0) { + log.error(String.format("%s 文件夹不存在", indexFile.getPath() + File.pathSeparator + version)); + throw new ServiceException("服务器开小差了"); + } + File versionFile = versionFiles[0]; + languageDir = versionFile.listFiles(); + } else { + languageDir = indexFile.listFiles(); + } + JSONArray jsonArray = new JSONArray(); + for (File languageFile : languageDir) { + String lang = languageFile.getName(); + File docFile; + if (TypeConstants.BLOGS.equals(type)) { + File[] files = languageFile.listFiles(fileName -> "post".equals(fileName.getName())); + if (null != files && files.length == 1) { + docFile = files[0]; + } else { + continue; + } + } else { + File[] files = languageFile.listFiles(fileName -> type.equals(fileName.getName())); + if (null != files && files.length == 1) { + System.out.println(type + files[0]); + docFile = files[0]; + } else { + continue; + } + } + + Collection listFiles = FileUtils.listFiles(docFile, new String[]{"md"}, true); + for (File mdFile : listFiles) { + if (!mdFile.getName().startsWith("_")) { + try { + jsonArray.add(DataToMap(basePath, version, type, lang, mdFile)); + } catch (Exception e) { + log.error(e.getMessage()); + } + } + } + + log.info(String.format("===============解析%s%s代码成功,开始更新es=============", type, docFile)); + List
list = jsonArray.toJavaList(Article.class); articleRepository.saveAll(list); + log.info(String.format("===============更新%s%s数据成功=============", type, docFile)); + } + } + + + + + + private Map DataToMap(String basePath, String version, String type, String lang, File mdFile) { + Map data = new HashMap<>(); + String articleName = mdFile.getPath().replace(basePath, "") + .replace("\\\\", "/") + .replace(".md", "") + .replace("/" + lang + "/", "/"); + if (!TypeConstants.DOCS.equals(articleName)) { + articleName = articleName.replaceFirst(type + "/", ""); + } + data.put("id", IdUtil.getId()); + data.put("articleName", articleName); + data.put("path", type); + try { + data.put("textContent", ParseHtmlUtil.parseHtml(FileUtils.readFileToString(mdFile, StandardCharsets.UTF_8))); + } catch (IOException e) { + throw new ServiceException("服务器开小差了"); } - log.info(String.format("===============更新%s数据成功=============", type)); + data.put("title", mdFile.getName().replaceAll(".md", "")); + + data.put("type", type); + + data.put("lang", lang); + + data.put("version---", version); + + return data; } + + + + @Override public Map searchByCondition(SearchCondition condition) throws IOException { int startIndex = (condition.getPage() - 1) * condition.getPageSize(); - SearchRequest request = new SearchRequest("gauss_articles"); + SearchRequest request = new SearchRequest("chenyang_gauss_articles"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); BoolQueryBuilder mustBuilder = QueryBuilders.boolQuery(); BoolQueryBuilder shouldBuilder = QueryBuilders.boolQuery(); @@ -157,7 +260,7 @@ public class SearchServiceImpl implements SearchService { @Override public Set searchTips(String lang, String keywords) { try { - SearchRequest searchRequest = new SearchRequest("gauss_tips"); + SearchRequest searchRequest = new SearchRequest("chenyang_gauss_tips"); SearchSourceBuilder sourceBuilder = new SearchSourceBuilder(); sourceBuilder.fetchSource("textTip", ""); BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); diff --git a/src/index-tool/src/main/java/com/opengauss/utils/ParseHtmlUtil.java b/src/index-tool/src/main/java/com/opengauss/utils/ParseHtmlUtil.java index b436ac6bca0b7efbd60c00dec55adc2db7690504..11137de7d23fba83098e7b972668fbb42a6faeed 100644 --- a/src/index-tool/src/main/java/com/opengauss/utils/ParseHtmlUtil.java +++ b/src/index-tool/src/main/java/com/opengauss/utils/ParseHtmlUtil.java @@ -1,5 +1,9 @@ package com.opengauss.utils; +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.text.TextContentRenderer; import org.jsoup.Jsoup; import org.jsoup.safety.Whitelist; import org.pegdown.PegDownProcessor; @@ -10,12 +14,13 @@ public class ParseHtmlUtil { private static final String regex = "\\+\\+\\+(.*?)\\+\\+\\+"; public static String parseHtml(String mdStr) { - PegDownProcessor pdp = new PegDownProcessor(Integer.MAX_VALUE); - // 将markdown转成html - String htmlContent = pdp.markdownToHtml(mdStr); - //去除html标签 - htmlContent = Jsoup.clean(htmlContent, Whitelist.none()); - htmlContent = htmlContent.replaceAll(lineRegex, "").replaceAll(regex, ""); - return htmlContent.trim(); + Parser parser = Parser.builder().build(); + Node document = parser.parse(mdStr); + + TextContentRenderer textContentRenderer = TextContentRenderer.builder().build(); + //This is Sparta + return textContentRenderer.render(document).replaceAll(lineRegex, "").replaceAll(regex, ""); + + } }