algolia docsearch-scraper test

This commit is contained in:
Artur Akmalov
2023-07-26 14:59:14 +05:00
parent eefc54ebce
commit 465f49c4db
2 changed files with 51 additions and 3 deletions

View File

@@ -40,8 +40,8 @@ jobs:
name: Algolia updates
runs-on: ubuntu-latest
env:
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
ALGOLIA_API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
APPLICATION_ID: ${{ secrets.ALGOLIA_APP_ID }}
API_KEY: ${{ secrets.ALGOLIA_API_KEY }}
ALGOLIA_INDEX_NAME: ${{ secrets.ALGOLIA_INDEX_NAME }}
steps:
- name: Checkout code
@@ -49,4 +49,4 @@ jobs:
- name: Run Docker image
run: |
docker run hello-world:latest
docker run -it -e APPLICATION_ID=$APPLICATION_ID -e API_KEY=$API_KEY -e "CONFIG=$(cat algolia_config.json | jq -r tostring)" algolia/docsearch-scraper

48
algolia_config.json Normal file
View File

@@ -0,0 +1,48 @@
{
"index_name": "prod_akmalov",
"start_urls": [
"https://akmalov.com/"
],
"sitemap_urls": [
"https://akmalov.com/sitemap.xml"
],
"sitemap_alternate_links": true,
"stop_urls": [],
"selectors": {
"lvl0": {
"selector": "(//ul[contains(@class,'menu__list')]//a[contains(@class, 'menu__link menu__link--sublist menu__link--active')]/text() | //nav[contains(@class, 'navbar')]//a[contains(@class, 'navbar__link--active')]/text())[last()]",
"type": "xpath",
"global": true,
"default_value": "Documentation"
},
"lvl1": "header h1",
"lvl2": "article h2",
"lvl3": "article h3",
"lvl4": "article h4",
"lvl5": "article h5, article td:first-child",
"lvl6": "article h6",
"text": "article p, article li, article td:last-child"
},
"strip_chars": " .,;:#",
"custom_settings": {
"separatorsToIndex": "_",
"attributesForFaceting": [
"language",
"version",
"type",
"docusaurus_tag"
],
"attributesToRetrieve": [
"hierarchy",
"content",
"anchor",
"url",
"url_without_anchor",
"type"
]
},
"conversation_id": [
"833762294"
],
"nb_hits": 9510
}