From b634e8f6aeaa793883c101648d8fc8818d5d74ae Mon Sep 17 00:00:00 2001 From: varac Date: Thu, 24 Oct 2013 16:44:41 +0000 Subject: now using custom dump method, much faster than using the scipt from python-couchdb --- README.md | 33 +++++++++++++----- couchdb-scripts-defaults.conf | 7 ++++ couchdb_dumpall.sh | 8 +---- couchdb_functions | 80 ++++++++++++++++++++++++++++++++++++++----- 4 files changed, 104 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 1ff2c89..5c39a9b 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,39 @@ Leap Couchdb/Bigcouch scripts ============================= -Issues ------- +Todo +==== -* dump_db() and restore_db() rely on python-couchdb package, - python-couchdb =< 0.8-1 needs to be patched, see - http://code.google.com/p/couchdb-python/issues/detail?id=194 +* move from curl to wget, because it's faster +Prerequisites +============= -Exapmples -========= +use a ./~.netrc file for authentication: + + machine 127.0.0.1 login admin password YOUR_PW + +Examples +======== Use couchdb functions on command line ------------------------------------- . couchdb-scripts-defaults.conf . couchdb_functions + + # get all db names get_dbs $URL - restore_db $URL users_replicated $user $pw + + # delete db + delete_db $URL users + + # dump db "users" to stdout + dump_db $URL users + + # Dump db "users" to default backupdir + dump_db_to_file $URL users + + # restore db "users" from default backupdir + restore_db $URL users diff --git a/couchdb-scripts-defaults.conf b/couchdb-scripts-defaults.conf index c0dfd2d..e7ad685 100644 --- a/couchdb-scripts-defaults.conf +++ b/couchdb-scripts-defaults.conf @@ -1,11 +1,18 @@ NETRC_FILE='/etc/couchdb/couchdb.netrc' + OPTS="--netrc-file $NETRC_FILE -HContent-Type:application/json -s" CURL="curl $OPTS " + +# wget needs --auth-no-challenge, see bugs.debian.org/600169 +WGET_OPTS='--auth-no-challenge -q -O -' + + URL='http://127.0.0.1:5984' BACKEND_URL='http://127.0.0.1:5986' DUMPDIR='/var/backups/couchdb' TMPPREFIX='tmp' +# replication uses user=`cat /etc/couchdb/couchdb.netrc | cut -d ' ' -f 4` pw=`cat /etc/couchdb/couchdb.netrc | cut -d ' ' -f 6` auth_url="http://${user}:${pw}@127.0.0.1:5984" diff --git a/couchdb_dumpall.sh b/couchdb_dumpall.sh index 53cfae4..80f15ed 100755 --- a/couchdb_dumpall.sh +++ b/couchdb_dumpall.sh @@ -1,9 +1,5 @@ #!/bin/bash -# dump_db() and restore_db() rely on python-couchdb package, -# python-couchdb =< 0.8-1 needs to be patched, see -# http://code.google.com/p/couchdb-python/issues/detail?id=194 - . couchdb-scripts-defaults.conf . couchdb_functions @@ -11,10 +7,8 @@ [ -d $DUMPDIR ] || mkdir $DUMPDIR dbs="`get_dbs $URL`" -#dbs='tickets' # for debugging for db in $dbs do - dump_db ${URL} $db $user $pw - + dump_db_to_file ${URL} $db done diff --git a/couchdb_functions b/couchdb_functions index cf6cb2b..b86ac07 100644 --- a/couchdb_functions +++ b/couchdb_functions @@ -39,7 +39,16 @@ doc_exists () { } -dump_db () { +dump_db_old () { + # couchdb-dump cmd is VERY slow + # we don't use this method in production, + # only left here for speed evaluation + + # dump_db() and restore_db() rely on python-couchdb package, + # python-couchdb =< 0.8-1 needs to be patched, see + # http://code.google.com/p/couchdb-python/issues/detail?id=194 + + local url=$1 local db=$2 local user=$3 @@ -49,19 +58,74 @@ dump_db () { [ -z $dumpdir ] && dumpdir='/var/backups/couchdb' echo "Dumping db \"$db\" to ${dumpdir}/$db" - # couchdb-dump cmd is VERY slow, simply dumping _all_docs?include_docs=true - # to a file is faster with the factor ~60 (!) couchdb-dump -u $user -p $pw ${url}/$db > ${dumpdir}/$db #2>/dev/null - # restoring from this will not include _design/User right - #$CURL -X GET "${URL}/${db}/_all_docs?include_docs=true" > ${DUMPDIR}/$db - echo "Dumping _security to ${DUMPDIR}/${db}_security" $CURL -X GET "${URL}/${db}/_security" > ${DUMPDIR}/${db}_security chmod 600 ${dumpdir}/${db}* } +dump_db () { + local url=$1 + local db=$2 + + # old curl options, just for re-evaluating speed + # curl_opts='--netrc-file /etc/couchdb/couchdb.netrc' + + #read -a docs <<< `curl $curl_opts -sS 127.0.0.1:5984/$db/_all_docs | json_pp | sed -ne 's/"id" : "//p' | sed -e 's/",//'` + read -a docs <<< `wget $WGET_OPTS 127.0.0.1:5984/$db/_all_docs | json_pp | sed -ne 's/"id" : "//p' | sed -e 's/",//'` + + count=${#docs[*]} + last=${docs[$(( count - 1 ))]} + + echo '{' + echo '"new_edits":false,' + echo '"docs": [' + for id in "${docs[@]}" + do + # curl $curl_opts -sS 127.0.0.1:5984/$db/$id + wget $WGET_OPTS 127.0.0.1:5984/$db/$id + if [[ $id != $last ]] + then + echo ',' + fi + done + + echo ']' + echo '}' +} + +dump_db_to_file () { + local url=$1 + local db=$2 + local dumpdir=$3 + + [ -z $dumpdir ] && dumpdir='/var/backups/couchdb' + echo "Dumping db \"$db\" to ${dumpdir}/$db" + + dump_db $url $db $dumpdir > ${dumpdir}/$db + chmod 600 ${dumpdir}/${db} + + dump_db_security $url $db $dumpdir +} + +dump_db_security () { + + + local url=$1 + local db=$2 + local dumpdir=$3 + + [ -z $dumpdir ] && dumpdir='/var/backups/couchdb' + + echo "Dumping \"$db\" _security to ${DUMPDIR}/${db}_security" + $CURL -X GET "${URL}/${db}/_security" > ${DUMPDIR}/${db}_security + + chmod 600 ${dumpdir}/${db}_security +} + + get_dbs () { local url=$1 local dbs="`$CURL -X GET "${url}/_all_dbs" | sed 's/[\[",]/ /g' | sed 's/]//'`" @@ -116,9 +180,7 @@ replicate_db () { restore_db () { local url=$1 local db=$2 - local user=$3 - local pw=$4 - local dumpdir=$5 + local dumpdir=$3 [ -z $dumpdir ] && dumpdir='/var/backups/couchdb' # restore with couchdb-load only works with an empty db -- cgit v1.2.3