- [X] simplewiki
- [ ] dewiki
- [ ] ptwiki
- [X] elwiki
- [ ] frwiki
- [ ] enwiki
- [X] arwiki
- [x] viwiki
- [X] cswiki
- [X] bnwiki
WARNING: Make sure the `data/$WIKI_ID` directory has up-to-date datasets. Otherwise you need to rerun `run-pipeline.sh` from the beginning.
```lang=bash
WIKI_ID=elwiki
DATASET_PATH=$(pwd)/data/${WIKI_ID}
DB_USER=${DB_USER:-research}
DB_DATABASE=${DB_DATABASE:-staging}
DB_HOST=${DB_HOST:-dbstore1005.eqiad.wmnet}
DB_PORT=${DB_PORT:-3350}
DB_READ_DEFAULT_FILE=${DB_READ_DEFAULT_FILE:-/etc/mysql/conf.d/analytics-research-client.cnf}
DB_USER=$DB_USER \
DB_DATABASE=$DB_DATABASE \
DB_HOST=$DB_HOST \
DB_PORT=$DB_PORT \
DB_READ_DEFAULT_FILE=$DB_READ_DEFAULT_FILE \
python create_tables.py -id "$WIKI_ID"
DB_USER=$DB_USER \
DB_DATABASE=$DB_DATABASE \
DB_HOST=$DB_HOST \
DB_PORT=$DB_PORT \
DB_READ_DEFAULT_FILE=$DB_READ_DEFAULT_FILE \
python copy-sqlite-to-mysql.py -id "$WIKI_ID"
DB_USER=$DB_USER \
DB_DATABASE=$DB_DATABASE \
DB_HOST=$DB_HOST \
DB_PORT=$DB_PORT \
DB_READ_DEFAULT_FILE=$DB_READ_DEFAULT_FILE \
python export-tables.py -id "$WIKI_ID" --path "$DATASET_PATH"
echo "Generated datasets in $DATASET_PATH"
echo "To publish the datasets, run \"WIKI_ID=$WIKI_ID ./publish-datasets.sh\""
```