Uživatel:Stardust85/statistika
Skočit na navigaci
Skočit na vyhledávání
Kód wikitabulky vygenerován skriptem - počítá pouze existující kategorie. Vznikne-li nová kategorie, musí se sem ručně doplnit (nebo spustit znovu skript).
tabulka[editovat]
jazyk / druh | subst. | adjekt. | zájm. | čísl. | slovesa | příslovce | předložky | spojky | částice | citosl. | celkem |
---|---|---|---|---|---|---|---|---|---|---|---|
Afrikánština | 28 | 4 | - | - | - | - | - | - | - | - | 32 |
Angličtina | 7 881 | 2 119 | 49 | 38 | 2 089 | 256 | 47 | 35 | 5 | 54 | 12573 |
Arabština | 132 | - | - | - | - | - | - | - | - | - | 132 |
Asturština | 4 | - | - | - | - | - | - | - | - | - | 4 |
Běloruština | 26 | - | - | - | - | - | - | 1 | - | - | 27 |
Bosenština | 9 | - | - | - | - | - | - | - | - | - | 9 |
Bretonština | 18 | - | - | - | - | - | - | - | - | - | 18 |
Bulharština | 36 | - | - | - | - | - | - | 2 | - | - | 38 |
Čečenština | 10 | - | - | - | - | - | - | - | - | - | 10 |
Čeština | 23 571 | 9 008 | 144 | 199 | 3 202 | 1 256 | 94 | 129 | 118 | 344 | 38065 |
Dánština | 139 | 46 | 3 | 23 | 6 | - | - | - | - | - | 217 |
Esperanto | 633 | - | 9 | - | - | 12 | - | - | - | - | 654 |
Estonština | 124 | 8 | - | - | - | - | - | - | - | - | 132 |
Fidžijština | - | - | - | - | 1 | - | - | - | - | - | 1 |
Finština | 1 797 | 313 | - | - | - | 116 | - | - | - | - | 2226 |
Francouzština | 6 319 | 3 207 | 56 | 34 | 2 169 | 196 | - | 19 | - | 45 | 12045 |
Fríština | - | 1 | - | - | - | - | - | - | - | - | 1 |
Galicijština | 46 | - | - | - | - | - | - | - | - | - | 46 |
Hebrejština | 609 | - | - | - | - | - | - | - | - | - | 609 |
Chorvatština | 53 | - | - | - | - | - | - | - | - | - | 53 |
Interlingua | - | - | - | - | - | - | - | - | - | - | ' |
Irština | 306 | - | - | - | - | - | - | - | - | - | 306 |
Islandština | 146 | - | - | 2 | - | - | - | - | - | - | 148 |
Italština | 1 828 | 251 | 13 | - | 163 | 36 | - | 11 | - | - | 2302 |
Japonština | 2 632 | - | - | - | - | - | - | - | - | - | 2632 |
Kašubština | 133 | - | - | - | - | - | - | - | - | - | 133 |
Katalánština | 143 | 8 | - | - | - | - | - | - | - | - | 151 |
Kečuánština | 5 | - | - | - | - | - | - | - | - | - | 5 |
Krymská tatarština | 9 | - | - | - | - | - | - | - | - | - | 9 |
Latina | 1 417 | 455 | 27 | 17 | 399 | - | - | 26 | - | - | 2341 |
Litevština | 222 | - | - | - | - | - | - | - | 4 | - | 226 |
Lotyština | 128 | - | - | - | - | - | - | - | - | - | 128 |
Maďarština | 535 | 89 | - | - | 82 | - | - | - | - | - | 706 |
Makedonština | - | - | - | - | - | - | - | 1 | - | - | 1 |
Němčina | 8 611 | 1 909 | 44 | 90 | 818 | 261 | 40 | 35 | 9 | - | 11817 |
Nizozemština | 603 | 103 | 15 | - | 99 | - | - | - | - | - | 820 |
Norština | 8 | 2 | 2 | 2 | - | - | - | - | - | - | 14 |
Okcitánština | 85 | - | - | - | - | - | - | - | - | - | 85 |
Oshiwambo | 0 | - | 0 | - | 0 | - | - | - | - | - | 0 |
Polština | 4 026 | 207 | 23 | - | - | 36 | - | 14 | 6 | - | 4312 |
Portugalština | 365 | - | - | - | 54 | - | - | 6 | - | - | 425 |
Romština | 182 | - | 11 | - | - | - | - | - | - | - | 193 |
Rumunština | 83 | - | - | - | - | - | 2 | - | - | - | 85 |
Ruština | 1 594 | 1 352 | 20 | - | 282 | - | - | 7 | - | - | 3255 |
Řečtina | 976 | - | - | - | - | - | - | - | - | - | 976 |
Sanskrt | 29 | - | - | - | - | - | - | - | - | - | 29 |
Skotská gaelština | 15 | - | - | - | - | - | - | - | - | - | 15 |
Slovenština | 1 601 | 778 | 31 | 12 | 139 | 77 | 19 | 17 | - | - | 2674 |
Slovinština | 123 | 8 | - | - | - | 9 | - | - | - | - | 140 |
Slovio | 0 | - | - | - | - | - | - | 0 | - | - | 0 |
Srbština | 1 597 | - | - | - | - | - | - | 3 | - | - | 1600 |
Stará angličtina | 29 | - | - | - | - | - | - | 3 | - | - | 32 |
Staroslověnština | - | - | - | - | - | - | - | 2 | - | - | 2 |
Svahilština | 90 | - | - | - | - | - | - | - | - | - | 90 |
Španělština | 1 631 | 158 | 41 | 53 | 199 | 29 | 9 | 9 | 3 | - | 2132 |
Švédština | 699 | 134 | 49 | 69 | 249 | 93 | 34 | 24 | 8 | 14 | 1373 |
Tádžičtina | - | - | - | - | - | - | 1 | - | - | - | 1 |
Tofalarština | - | - | 1 | - | - | - | - | - | - | - | 1 |
Turečtina | 429 | - | - | - | - | - | - | - | - | - | 429 |
Ukrajinština | 251 | - | - | - | - | - | - | 2 | - | - | 253 |
Velština | 20 | - | 4 | - | - | - | - | - | - | - | 24 |
kód skriptu[editovat]
#!/bin/bash
# Generates table with detailed statistics about languages for cs.wiktionary.org
# see the result at cs.wiktionary.org/wiki/Uživatel:Stardust85/statistika
# author: Michel Samia (m.samia at seznam.cz)
# usage: $ ./newstats.sh > wikitable.txt
# WARNING: this script LOADS wikimedia servers by one request per language category
# (now mid 2009 about 60 GETs), so don't run it too often
LANG= # because behaviour of sorting, greping and other things are locale-dependent
DOMAIN="http://cs.wiktionary.org/wiki"
TYPES="substantiva\nadjektiva\nzájmena\nčíslovky\nslovesa\npříslovce\npředložky\nspojky\nčástice\ncitoslovce"
mkdir langs
i=0 # counter
echo -e "\nSTEP 1: Downloading categories" >&2
# Get list of language categories
wget -q -O- http://cs.wiktionary.org/wiki/Kategorie:Jazykov%C3%A9_kategorie |grep '<span class="CategoryTreeBullet">' |
{
echo "Next language..." >&2
while read line
do
# page containing categories by types of words of given language (nouns, verbs...)
langHref="$DOMAIN` echo $line | sed -e 's/^.*wiki//' | sed -e 's/".*$//'`"
# name of the language
langName="`echo $line | cut -d'>' -f 12 | cut -d'<' -f1`"
echo -n > "langs/${langName}"
echo $i $langName >&2
i=$[ i + 1 ]
# uz mame url kategorie s jazykem, ted z nej musime ziskat vhodne podkategorie
# ty ulozime po radcich do lang/$langname, napr lang/Afrikánština
wget -q -O- $langHref | grep '<li><div class="CategoryTreeSection"><div class="CategoryTreeItem">' |
{
sum=0
while read line2
do
typeHref="` echo $line2 | sed -e 's/<[^>]*>//g' | sed -e 's/\[.*\]//' | sed -e 's/(.*$//'`"
echo "$typeHref" >> "langs/$langName"
done
}
echo -n "sleeping..." >&2
sleep 1
echo "OK" >&2
done
}
######## generating the table ######
#now we have all the data and we can generate the table
echo -e "\nSTEP 2: Creating table" >&2
cd langs/
i=0
for file in *
do
echo "$i: $file" >&2
i=$[ i+1 ]
echo " | $file"
echo -e "${TYPES}" |
{
while read druh
do
completeDruh="`grep "$druh" "$file" | sed 's/^ //'`"
if [[ "$completeDruh" == "" ]]
then
echo ' | -'
else
echo " | [[:Kategorie:$completeDruh| {{PAGESINCATEGORY:$completeDruh}}]]"
fi
done
}
echo " | '''{{#expr:"
first="true"
echo -e "${TYPES}" |
{
while read druh
do
completeDruh="`grep "$druh" "$file" | sed 's/^ //'`"
if [[ $completeDruh != "" ]]
then
if [[ $first == "true" ]]
then
echo "{{PAGESINCATEGORY:$completeDruh|R}}"
first="false"
else
echo "+{{PAGESINCATEGORY:$completeDruh|R}}"
fi
fi
done
echo "}}'''"
echo " |-"
}
done