Uživatel:Stardust85/statistika
Vzhled
Kód wikitabulky vygenerován skriptem - počítá pouze existující kategorie. Vznikne-li nová kategorie, musí se sem ručně doplnit (nebo spustit znovu skript).
tabulka
[editovat]jazyk / druh | subst. | adjekt. | zájm. | čísl. | slovesa | příslovce | předložky | spojky | částice | citosl. | celkem |
---|---|---|---|---|---|---|---|---|---|---|---|
Afrikánština | 30 | 4 | - | - | - | - | - | - | - | - | 34 |
Angličtina | 8 137 | 2 277 | 49 | 38 | 2 155 | 265 | 48 | 36 | 5 | 55 | 13065 |
Arabština | 139 | - | - | - | - | - | - | - | - | - | 139 |
Asturština | 4 | - | - | - | - | - | - | - | - | - | 4 |
Běloruština | 27 | - | - | - | - | - | - | 1 | - | - | 28 |
Bosenština | 9 | - | - | - | - | - | - | - | - | - | 9 |
Bretonština | 18 | - | - | - | - | - | - | - | - | - | 18 |
Bulharština | 36 | - | - | - | - | - | - | 2 | - | - | 38 |
Čečenština | 10 | - | - | - | - | - | - | - | - | - | 10 |
Čeština | 24 339 | 9 391 | 169 | 215 | 3 353 | 1 318 | 94 | 131 | 119 | 370 | 39499 |
Dánština | 141 | 46 | 3 | 24 | 6 | - | - | - | - | - | 220 |
Esperanto | 643 | - | 9 | - | - | 12 | - | - | - | - | 664 |
Estonština | 126 | 8 | - | - | - | - | - | - | - | - | 134 |
Fidžijština | - | - | - | - | 1 | - | - | - | - | - | 1 |
Finština | 1 802 | 314 | - | - | - | 116 | - | - | - | - | 2232 |
Francouzština | 6 371 | 3 226 | 56 | 34 | 2 181 | 198 | - | 19 | - | 45 | 12130 |
Fríština | - | 1 | - | - | - | - | - | - | - | - | 1 |
Galicijština | 47 | - | - | - | - | - | - | - | - | - | 47 |
Hebrejština | 619 | - | - | - | - | - | - | - | - | - | 619 |
Chorvatština | 54 | - | - | - | - | - | - | - | - | - | 54 |
Interlingua | - | - | - | - | - | - | - | - | - | - | ' |
Irština | 307 | - | - | - | - | - | - | - | - | - | 307 |
Islandština | 152 | - | - | 2 | - | - | - | - | - | - | 154 |
Italština | 1 840 | 252 | 14 | - | 164 | 38 | - | 11 | - | - | 2319 |
Japonština | 2 746 | - | - | - | - | - | - | - | - | - | 2746 |
Kašubština | 136 | - | - | - | - | - | - | - | - | - | 136 |
Katalánština | 143 | 9 | - | - | - | - | - | - | - | - | 152 |
Kečuánština | 5 | - | - | - | - | - | - | - | - | - | 5 |
Krymská tatarština | 10 | - | - | - | - | - | - | - | - | - | 10 |
Latina | 1 422 | 458 | 28 | 18 | 402 | - | - | 26 | - | - | 2354 |
Litevština | 288 | - | - | - | - | - | - | - | 5 | - | 293 |
Lotyština | 255 | - | - | - | - | - | - | - | - | - | 255 |
Maďarština | 552 | 89 | - | - | 85 | - | - | - | - | - | 726 |
Makedonština | - | - | - | - | - | - | - | 1 | - | - | 1 |
Němčina | 8 730 | 1 973 | 44 | 90 | 839 | 266 | 40 | 37 | 9 | - | 12028 |
Nizozemština | 637 | 116 | 15 | - | 113 | - | - | - | - | - | 881 |
Norština | 11 | 2 | 2 | 2 | - | - | - | - | - | - | 17 |
Okcitánština | 85 | - | - | - | - | - | - | - | - | - | 85 |
Oshiwambo | 0 | - | 0 | - | 0 | - | - | - | - | - | 0 |
Polština | 4 114 | 228 | 23 | - | - | 36 | - | 14 | 6 | - | 4421 |
Portugalština | 378 | - | - | - | 55 | - | - | 6 | - | - | 439 |
Romština | 185 | - | 11 | - | - | - | - | - | - | - | 196 |
Rumunština | 87 | - | - | - | - | - | 2 | - | - | - | 89 |
Ruština | 1 620 | 1 359 | 20 | - | 287 | - | - | 7 | - | - | 3293 |
Řečtina | 1 034 | - | - | - | - | - | - | - | - | - | 1034 |
Sanskrt | 31 | - | - | - | - | - | - | - | - | - | 31 |
Skotská gaelština | 15 | - | - | - | - | - | - | - | - | - | 15 |
Slovenština | 1 691 | 804 | 31 | 12 | 175 | 81 | 20 | 18 | - | - | 2832 |
Slovinština | 148 | 15 | - | - | - | 12 | - | - | - | - | 175 |
Slovio | 0 | - | - | - | - | - | - | 0 | - | - | 0 |
Srbština | 1 600 | - | - | - | - | - | - | 3 | - | - | 1603 |
Stará angličtina | 29 | - | - | - | - | - | - | 3 | - | - | 32 |
Staroslověnština | - | - | - | - | - | - | - | 2 | - | - | 2 |
Svahilština | 92 | - | - | - | - | - | - | - | - | - | 92 |
Španělština | 1 643 | 159 | 42 | 53 | 202 | 29 | 9 | 9 | 3 | - | 2149 |
Švédština | 702 | 134 | 49 | 69 | 250 | 93 | 34 | 24 | 8 | 14 | 1377 |
Tádžičtina | - | - | - | - | - | - | 1 | - | - | - | 1 |
Tofalarština | - | - | 1 | - | - | - | - | - | - | - | 1 |
Turečtina | 440 | - | - | - | - | - | - | - | - | - | 440 |
Ukrajinština | 275 | - | - | - | - | - | - | 2 | - | - | 277 |
Velština | 21 | - | 4 | - | - | - | - | - | - | - | 25 |
kód skriptu
[editovat]#!/bin/bash
# Generates table with detailed statistics about languages for cs.wiktionary.org
# see the result at cs.wiktionary.org/wiki/Uživatel:Stardust85/statistika
# author: Michel Samia (m.samia at seznam.cz)
# usage: $ ./newstats.sh > wikitable.txt
# WARNING: this script LOADS wikimedia servers by one request per language category
# (now mid 2009 about 60 GETs), so don't run it too often
LANG= # because behaviour of sorting, greping and other things are locale-dependent
DOMAIN="http://cs.wiktionary.org/wiki"
TYPES="substantiva\nadjektiva\nzájmena\nčíslovky\nslovesa\npříslovce\npředložky\nspojky\nčástice\ncitoslovce"
mkdir langs
i=0 # counter
echo -e "\nSTEP 1: Downloading categories" >&2
# Get list of language categories
wget -q -O- http://cs.wiktionary.org/wiki/Kategorie:Jazykov%C3%A9_kategorie |grep '<span class="CategoryTreeBullet">' |
{
echo "Next language..." >&2
while read line
do
# page containing categories by types of words of given language (nouns, verbs...)
langHref="$DOMAIN` echo $line | sed -e 's/^.*wiki//' | sed -e 's/".*$//'`"
# name of the language
langName="`echo $line | cut -d'>' -f 12 | cut -d'<' -f1`"
echo -n > "langs/${langName}"
echo $i $langName >&2
i=$[ i + 1 ]
# uz mame url kategorie s jazykem, ted z nej musime ziskat vhodne podkategorie
# ty ulozime po radcich do lang/$langname, napr lang/Afrikánština
wget -q -O- $langHref | grep '<li><div class="CategoryTreeSection"><div class="CategoryTreeItem">' |
{
sum=0
while read line2
do
typeHref="` echo $line2 | sed -e 's/<[^>]*>//g' | sed -e 's/\[.*\]//' | sed -e 's/(.*$//'`"
echo "$typeHref" >> "langs/$langName"
done
}
echo -n "sleeping..." >&2
sleep 1
echo "OK" >&2
done
}
######## generating the table ######
#now we have all the data and we can generate the table
echo -e "\nSTEP 2: Creating table" >&2
cd langs/
i=0
for file in *
do
echo "$i: $file" >&2
i=$[ i+1 ]
echo " | $file"
echo -e "${TYPES}" |
{
while read druh
do
completeDruh="`grep "$druh" "$file" | sed 's/^ //'`"
if [[ "$completeDruh" == "" ]]
then
echo ' | -'
else
echo " | [[:Kategorie:$completeDruh| {{PAGESINCATEGORY:$completeDruh}}]]"
fi
done
}
echo " | '''{{#expr:"
first="true"
echo -e "${TYPES}" |
{
while read druh
do
completeDruh="`grep "$druh" "$file" | sed 's/^ //'`"
if [[ $completeDruh != "" ]]
then
if [[ $first == "true" ]]
then
echo "{{PAGESINCATEGORY:$completeDruh|R}}"
first="false"
else
echo "+{{PAGESINCATEGORY:$completeDruh|R}}"
fi
fi
done
echo "}}'''"
echo " |-"
}
done