{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "![](https://github.com/bigdata-icict/ETL-Dataiku-DSS/raw/master/tutoriais/pcdas_1.5.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Notebook para criação de tabela de indicadores da PNS - módulo O 2013 Acidentes" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Bibliotecas Utilizadas" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Lendo pacotes necessários\n", "library(survey)\n", "library(dplyr)\n", "library(tictoc)\n", "library(foreign)\n", "library(forcats)\n", "library(tidyverse)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Carregando microdados da PNS" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Carregando banco de dados\n", "load(\"\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Definição do peso e filtragem de respondentes do questionário" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " Min. 1st Qu. Median Mean 3rd Qu. Max. \n", " 0.004156 0.243959 0.521557 1.000000 1.147413 31.179597 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Selecionando registros válidos para o módulo P e calculando peso amostral - summary de verificação\n", "pns2013.1<-pns2013 %>% filter(M001==1)\n", "pns2013.1<-pns2013.1 %>% mutate(peso_morador_selec=((V00291*(60202/145572211))))\n", "pns2013.1<-pns2013.1 %>% filter(!is.na(peso_morador_selec))\n", "summary(pns2013.1$peso_morador_selec)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Criação de variáveis dos indicadores" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Sim
25855
Não
27307
NA's
7040
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Sim] 25855\n", "\\item[Não] 27307\n", "\\item[NA's] 7040\n", "\\end{description*}\n" ], "text/markdown": [ "Sim\n", ": 25855Não\n", ": 27307NA's\n", ": 7040\n", "\n" ], "text/plain": [ " Sim Não NA's \n", "25855 27307 7040 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Sim
11780
Não
2547
NA's
45875
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Sim] 11780\n", "\\item[Não] 2547\n", "\\item[NA's] 45875\n", "\\end{description*}\n" ], "text/markdown": [ "Sim\n", ": 11780Não\n", ": 2547NA's\n", ": 45875\n", "\n" ], "text/plain": [ " Sim Não NA's \n", "11780 2547 45875 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Sim
24130
Não
5687
NA's
30385
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Sim] 24130\n", "\\item[Não] 5687\n", "\\item[NA's] 30385\n", "\\end{description*}\n" ], "text/markdown": [ "Sim\n", ": 24130Não\n", ": 5687NA's\n", ": 30385\n", "\n" ], "text/plain": [ " Sim Não NA's \n", "24130 5687 30385 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
Sim
1910
Não
58292
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Sim] 1910\n", "\\item[Não] 58292\n", "\\end{description*}\n" ], "text/markdown": [ "Sim\n", ": 1910Não\n", ": 58292\n", "\n" ], "text/plain": [ " Sim Não \n", " 1910 58292 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Usa sempre cinto quando anda no banco de trás de automóvel. - O002P\n", "pns2013.1 <- pns2013.1 %>% mutate(O002P=ifelse(O005==1, NA, ifelse(O005==2, 1, 2)))\n", "pns2013.1$O002P<-factor(pns2013.1$O002P, levels=c(1,2), labels=c(\"Sim\",\"Não\"))\n", "summary(pns2013.1$O002P)\n", "\n", "#Usa sempre capacete quando dirige motocicleta. - O003P\n", "\n", "pns2013.1 <- pns2013.1 %>% mutate(O003P= ifelse(O007==1, 1, 2))\n", "pns2013.1$O003P<-factor(pns2013.1$O003P, levels=c(1,2), labels=c(\"Sim\",\"Não\"))\n", "summary(pns2013.1$O003P)\n", "\n", "#Usa sempre capacete quando está como passageiro de motocicleta. - O004P\n", "pns2013.1 <- pns2013.1 %>% mutate(O004P= ifelse(O008==1,NA,ifelse(O008==2,1,2)))\n", "pns2013.1$O004P<-factor(pns2013.1$O004P, levels=c(1,2), labels=c(\"Sim\",\"Não\"))\n", "summary(pns2013.1$O004P)\n", "\n", "#Usa sempre capacete quando está como passageiro de motocicleta. - O006P\n", "pns2013.1 <- pns2013.1 %>% mutate(O006P = ifelse(O009==1,1,\n", " ifelse(O009==2,2,2))) \n", "pns2013.1$O006P<-factor(pns2013.1$O006P, levels=c(1,2), labels=c(\"Sim\",\"Não\"))\n", "summary(pns2013.1$O006P)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Definições de abrangências" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Situação urbana ou rural" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
urbano
49245
rural
10957
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[urbano] 49245\n", "\\item[rural] 10957\n", "\\end{description*}\n" ], "text/markdown": [ "urbano\n", ": 49245rural\n", ": 10957\n", "\n" ], "text/plain": [ "urbano rural \n", " 49245 10957 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Situação Urbano ou Rural\n", "pns2013.1 <- pns2013.1 %>% rename(Sit_Urbano_Rural=V0026)\n", "pns2013.1$Sit_Urbano_Rural<-factor(pns2013.1$Sit_Urbano_Rural, levels=c(1,2), labels=c(\"urbano\", \"rural\"))\n", "summary(pns2013.1$Sit_Urbano_Rural)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Sexo" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Masculino
25920
Feminino
34282
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Masculino] 25920\n", "\\item[Feminino] 34282\n", "\\end{description*}\n" ], "text/markdown": [ "Masculino\n", ": 25920Feminino\n", ": 34282\n", "\n" ], "text/plain": [ "Masculino Feminino \n", " 25920 34282 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Sexo\n", "pns2013.1 <- pns2013.1 %>% rename(Sexo=C006)\n", "pns2013.1$Sexo<-factor(pns2013.1$Sexo, levels=c(1,2), labels=c(\"Masculino\", \"Feminino\"))\n", "summary(pns2013.1$Sexo)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### UF" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Rondônia
1694
Acre
1814
Amazonas
2586
Roraima
1591
Pará
2004
Amapá
1332
Tocantins
1515
Maranhão
1774
Piauí
1804
Ceará
2560
Rio Grande do Norte
1691
Paraíba
1943
Pernambuco
2591
Alagoas
1748
Sergipe
1553
Bahia
2641
Minas Gerais
3779
Espírito Santo
1724
Rio de Janeiro
3486
São Paulo
5305
Paraná
3012
Santa Catarina
1623
Rio Grande do Sul
2913
Mato Grosso do Sul
1809
Mato Grosso
1476
Goiás
2423
Distrito Federal
1811
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Rondônia] 1694\n", "\\item[Acre] 1814\n", "\\item[Amazonas] 2586\n", "\\item[Roraima] 1591\n", "\\item[Pará] 2004\n", "\\item[Amapá] 1332\n", "\\item[Tocantins] 1515\n", "\\item[Maranhão] 1774\n", "\\item[Piauí] 1804\n", "\\item[Ceará] 2560\n", "\\item[Rio Grande do Norte] 1691\n", "\\item[Paraíba] 1943\n", "\\item[Pernambuco] 2591\n", "\\item[Alagoas] 1748\n", "\\item[Sergipe] 1553\n", "\\item[Bahia] 2641\n", "\\item[Minas Gerais] 3779\n", "\\item[Espírito Santo] 1724\n", "\\item[Rio de Janeiro] 3486\n", "\\item[São Paulo] 5305\n", "\\item[Paraná] 3012\n", "\\item[Santa Catarina] 1623\n", "\\item[Rio Grande do Sul] 2913\n", "\\item[Mato Grosso do Sul] 1809\n", "\\item[Mato Grosso] 1476\n", "\\item[Goiás] 2423\n", "\\item[Distrito Federal] 1811\n", "\\end{description*}\n" ], "text/markdown": [ "Rondônia\n", ": 1694Acre\n", ": 1814Amazonas\n", ": 2586Roraima\n", ": 1591Pará\n", ": 2004Amapá\n", ": 1332Tocantins\n", ": 1515Maranhão\n", ": 1774Piauí\n", ": 1804Ceará\n", ": 2560Rio Grande do Norte\n", ": 1691Paraíba\n", ": 1943Pernambuco\n", ": 2591Alagoas\n", ": 1748Sergipe\n", ": 1553Bahia\n", ": 2641Minas Gerais\n", ": 3779Espírito Santo\n", ": 1724Rio de Janeiro\n", ": 3486São Paulo\n", ": 5305Paraná\n", ": 3012Santa Catarina\n", ": 1623Rio Grande do Sul\n", ": 2913Mato Grosso do Sul\n", ": 1809Mato Grosso\n", ": 1476Goiás\n", ": 2423Distrito Federal\n", ": 1811\n", "\n" ], "text/plain": [ " Rondônia Acre Amazonas Roraima \n", " 1694 1814 2586 1591 \n", " Pará Amapá Tocantins Maranhão \n", " 2004 1332 1515 1774 \n", " Piauí Ceará Rio Grande do Norte Paraíba \n", " 1804 2560 1691 1943 \n", " Pernambuco Alagoas Sergipe Bahia \n", " 2591 1748 1553 2641 \n", " Minas Gerais Espírito Santo Rio de Janeiro São Paulo \n", " 3779 1724 3486 5305 \n", " Paraná Santa Catarina Rio Grande do Sul Mato Grosso do Sul \n", " 3012 1623 2913 1809 \n", " Mato Grosso Goiás Distrito Federal \n", " 1476 2423 1811 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Estados - UFs\n", "pns2013.1 <- pns2013.1 %>% rename(Unidades_da_Federacao=V0001)\n", "pns2013.1$Unidades_da_Federacao<-factor(pns2013.1$Unidades_da_Federacao, levels=c(11,12,13,14,15,16,17,21,22,23,24,25,26,27,28,29,31,32,33,35,41,42,43,50,51,52,53),\n", " label=c(\"Rondônia\",\"Acre\",\"Amazonas\",\"Roraima\",\"Pará\",\"Amapá\",\"Tocantins\",\"Maranhão\",\"Piauí\",\"Ceará\",\n", " \"Rio Grande do Norte\",\"Paraíba\",\"Pernambuco\",\"Alagoas\",\"Sergipe\",\"Bahia\",\n", " \"Minas Gerais\",\"Espírito Santo\",\"Rio de Janeiro\",\"São Paulo\",\n", " \"Paraná\",\"Santa Catarina\",\"Rio Grande do Sul\", \n", " \"Mato Grosso do Sul\",\"Mato Grosso\",\"Goiás\",\"Distrito Federal\"))\n", "summary(pns2013.1$Unidades_da_Federacao)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Grandes Regiões" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Norte
12536
Nordeste
18305
Sudeste
14294
Sul
7548
Centro-Oeste
7519
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Norte] 12536\n", "\\item[Nordeste] 18305\n", "\\item[Sudeste] 14294\n", "\\item[Sul] 7548\n", "\\item[Centro-Oeste] 7519\n", "\\end{description*}\n" ], "text/markdown": [ "Norte\n", ": 12536Nordeste\n", ": 18305Sudeste\n", ": 14294Sul\n", ": 7548Centro-Oeste\n", ": 7519\n", "\n" ], "text/plain": [ " Norte Nordeste Sudeste Sul Centro-Oeste \n", " 12536 18305 14294 7548 7519 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Grandes Regiões\n", "pns2013.1 <- pns2013.1 %>% \n", " mutate(GrandesRegioes = fct_collapse(Unidades_da_Federacao, \n", " `Norte` = c(\"Rondônia\",\"Acre\",\"Amazonas\",\"Roraima\",\"Pará\", \"Amapá\",\"Tocantins\"),\n", " `Nordeste` = c(\"Maranhão\", \"Piauí\", \"Ceará\", \"Rio Grande do Norte\", \"Paraíba\",\"Pernambuco\", \"Alagoas\",\"Sergipe\",\"Bahia\"),\n", " `Sudeste` = c(\"Minas Gerais\", \"Espírito Santo\",\"Rio de Janeiro\", \"São Paulo\"), \n", " `Sul` = c(\"Paraná\", \"Santa Catarina\", \"Rio Grande do Sul\"),\n", " `Centro-Oeste`= c(\"Mato Grosso do Sul\",\"Mato Grosso\", \"Goiás\", \"Distrito Federal\")))\n", "summary(pns2013.1$GrandesRegioes)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Faixa Etária" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
18 a 29 anos
14321
30 a 44 anos
20242
45 a 59 anos
14462
60 anos ou mais
11177
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[18 a 29 anos] 14321\n", "\\item[30 a 44 anos] 20242\n", "\\item[45 a 59 anos] 14462\n", "\\item[60 anos ou mais] 11177\n", "\\end{description*}\n" ], "text/markdown": [ "18 a 29 anos\n", ": 1432130 a 44 anos\n", ": 2024245 a 59 anos\n", ": 1446260 anos ou mais\n", ": 11177\n", "\n" ], "text/plain": [ " 18 a 29 anos 30 a 44 anos 45 a 59 anos 60 anos ou mais \n", " 14321 20242 14462 11177 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Faixas Etárias\n", "pns2013.1 <- pns2013.1 %>% mutate(faixa_idade=cut(C008,\n", " breaks = c(18,30, 45, 60,Inf),\n", " labels = c(\"18 a 29 anos\",\"30 a 44 anos\",\"45 a 59 anos\",\"60 anos ou mais\"), \n", " ordered_result = TRUE, right = FALSE))\n", "summary(pns2013.1$faixa_idade) " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Raça" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Branca
24106
Preta
5631
Parda
29512
NA's
953
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Branca] 24106\n", "\\item[Preta] 5631\n", "\\item[Parda] 29512\n", "\\item[NA's] 953\n", "\\end{description*}\n" ], "text/markdown": [ "Branca\n", ": 24106Preta\n", ": 5631Parda\n", ": 29512NA's\n", ": 953\n", "\n" ], "text/plain": [ "Branca Preta Parda NA's \n", " 24106 5631 29512 953 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Raça\n", "pns2013.1 <- pns2013.1 %>% mutate(Raca= ifelse(C009==1, 1, \n", " ifelse(C009==2, 2, \n", " ifelse(C009==4, 3, 9))))\n", "\n", "pns2013.1$Raca<-factor(pns2013.1$Raca, levels=c(1,2,3),labels=c(\"Branca\", \"Preta\", \"Parda\"))\n", "\n", "summary(pns2013.1$Raca)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Renda per capita" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Até 1/2 SM
14256
1/2 até 1 SM
17504
1 até 2 SM
15493
2 até 3 SM
5335
Mais de 3 SM
7603
NA's
11
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Até 1/2 SM] 14256\n", "\\item[1/2 até 1 SM] 17504\n", "\\item[1 até 2 SM] 15493\n", "\\item[2 até 3 SM] 5335\n", "\\item[Mais de 3 SM] 7603\n", "\\item[NA's] 11\n", "\\end{description*}\n" ], "text/markdown": [ "Até 1/2 SM\n", ": 142561/2 até 1 SM\n", ": 175041 até 2 SM\n", ": 154932 até 3 SM\n", ": 5335Mais de 3 SM\n", ": 7603NA's\n", ": 11\n", "\n" ], "text/plain": [ " Até 1/2 SM 1/2 até 1 SM 1 até 2 SM 2 até 3 SM Mais de 3 SM NA's \n", " 14256 17504 15493 5335 7603 11 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Rendimento domiciliar per capita\n", "pns2013.1 <- pns2013.1 %>% mutate(rend_per_capita=cut(VDF003,\n", " breaks = c(-Inf,339, 678, 1356, 2034,Inf),\n", " labels=c(\"Até 1/2 SM\",\"1/2 até 1 SM\",\"1 até 2 SM\",\"2 até 3 SM\",\"Mais de 3 SM\"), \n", " ordered_result = TRUE, right = TRUE, na.exclude= TRUE))\n", "\n", "summary(pns2013.1$rend_per_capita)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Escolaridade" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Fundamental incompleto ou equivalente
24083
Médio incompleto ou equivalente
9215
Superior incompleto ou equivalente
19149
Superior completo
7755
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Fundamental incompleto ou equivalente] 24083\n", "\\item[Médio incompleto ou equivalente] 9215\n", "\\item[Superior incompleto ou equivalente] 19149\n", "\\item[Superior completo] 7755\n", "\\end{description*}\n" ], "text/markdown": [ "Fundamental incompleto ou equivalente\n", ": 24083Médio incompleto ou equivalente\n", ": 9215Superior incompleto ou equivalente\n", ": 19149Superior completo\n", ": 7755\n", "\n" ], "text/plain": [ "Fundamental incompleto ou equivalente Médio incompleto ou equivalente \n", " 24083 9215 \n", " Superior incompleto ou equivalente Superior completo \n", " 19149 7755 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Escolaridade\n", "pns2013.1 <- pns2013.1 %>% mutate(gescol = ifelse(VDD004A %in% 1:2, 1, \n", " ifelse(VDD004A%in% 3:4, 2, \n", " ifelse(VDD004A%in% 5:6, 3,4\n", " ))))\n", "\n", "pns2013.1$gescol<-factor(pns2013.1$gescol, levels=c(1,2,3,4), \n", " labels=c(\"Fundamental incompleto ou equivalente\",\"Médio incompleto ou equivalente\",\n", " \"Superior incompleto ou equivalente\",\"Superior completo\"))\n", "summary(pns2013.1$gescol)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Capital" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Porto Velho
1694
Rio Branco
1814
Manaus
2586
Boa Vista
1591
Belém
2004
Macapá
1332
Palmas
1515
São Luís
1774
Teresina
1804
Fortaleza
2560
Natal
1691
João Pessoa
1943
Recife
2591
Maceió
1748
Aracaju
1553
Salvador
2641
Belo Horizonte
3779
Vitória
1724
Rio de Janeiro
3486
São Paulo
5305
Curitiba
3012
Florianópolis
1623
Porto Alegre
2913
Campo Grande
1809
Cuiabá
1476
Goiânia
2423
Brasília
1811
\n" ], "text/latex": [ "\\begin{description*}\n", "\\item[Porto Velho] 1694\n", "\\item[Rio Branco] 1814\n", "\\item[Manaus] 2586\n", "\\item[Boa Vista] 1591\n", "\\item[Belém] 2004\n", "\\item[Macapá] 1332\n", "\\item[Palmas] 1515\n", "\\item[São Luís] 1774\n", "\\item[Teresina] 1804\n", "\\item[Fortaleza] 2560\n", "\\item[Natal] 1691\n", "\\item[João Pessoa] 1943\n", "\\item[Recife] 2591\n", "\\item[Maceió] 1748\n", "\\item[Aracaju] 1553\n", "\\item[Salvador] 2641\n", "\\item[Belo Horizonte] 3779\n", "\\item[Vitória] 1724\n", "\\item[Rio de Janeiro] 3486\n", "\\item[São Paulo] 5305\n", "\\item[Curitiba] 3012\n", "\\item[Florianópolis] 1623\n", "\\item[Porto Alegre] 2913\n", "\\item[Campo Grande] 1809\n", "\\item[Cuiabá] 1476\n", "\\item[Goiânia] 2423\n", "\\item[Brasília] 1811\n", "\\end{description*}\n" ], "text/markdown": [ "Porto Velho\n", ": 1694Rio Branco\n", ": 1814Manaus\n", ": 2586Boa Vista\n", ": 1591Belém\n", ": 2004Macapá\n", ": 1332Palmas\n", ": 1515São Luís\n", ": 1774Teresina\n", ": 1804Fortaleza\n", ": 2560Natal\n", ": 1691João Pessoa\n", ": 1943Recife\n", ": 2591Maceió\n", ": 1748Aracaju\n", ": 1553Salvador\n", ": 2641Belo Horizonte\n", ": 3779Vitória\n", ": 1724Rio de Janeiro\n", ": 3486São Paulo\n", ": 5305Curitiba\n", ": 3012Florianópolis\n", ": 1623Porto Alegre\n", ": 2913Campo Grande\n", ": 1809Cuiabá\n", ": 1476Goiânia\n", ": 2423Brasília\n", ": 1811\n", "\n" ], "text/plain": [ " Porto Velho Rio Branco Manaus Boa Vista Belém \n", " 1694 1814 2586 1591 2004 \n", " Macapá Palmas São Luís Teresina Fortaleza \n", " 1332 1515 1774 1804 2560 \n", " Natal João Pessoa Recife Maceió Aracaju \n", " 1691 1943 2591 1748 1553 \n", " Salvador Belo Horizonte Vitória Rio de Janeiro São Paulo \n", " 2641 3779 1724 3486 5305 \n", " Curitiba Florianópolis Porto Alegre Campo Grande Cuiabá \n", " 3012 1623 2913 1809 1476 \n", " Goiânia Brasília \n", " 2423 1811 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Capital\n", "pns2013.1<- pns2013.1 %>% mutate(Capital= fct_collapse(Unidades_da_Federacao,\n", " `Porto Velho`= \"Rondônia\", \n", " `Boa Vista`= \"Roraima\", \n", " `Rio Branco`= \"Acre\", \n", " `Manaus` = \"Amazonas\",\n", " `Belém` = \"Pará\" ,\n", " `Macapá`= \"Amapá\",\n", " `Palmas` = \"Tocantins\",\n", " `São Luís` = \"Maranhão\",\n", " `Teresina`= \"Piauí\" ,\n", " `Fortaleza`= \"Ceará\",\n", " `Natal`= \"Rio Grande do Norte\",\n", " `João Pessoa`= \"Paraíba\",\n", " `Recife`= \"Pernambuco\",\n", " `Maceió`= \"Alagoas\",\n", " `Aracaju`= \"Sergipe\",\n", " `Salvador`= \"Bahia\",\n", " `Belo Horizonte`= \"Minas Gerais\",\n", " `Vitória`= \"Espírito Santo\",\n", " `Rio de Janeiro`= \"Rio de Janeiro\",\n", " `São Paulo`= \"São Paulo\",\n", " `Curitiba`= \"Paraná\",\n", " `Florianópolis`= \"Santa Catarina\",\n", " `Porto Alegre`= \"Rio Grande do Sul\",\n", " `Campo Grande`= \"Mato Grosso do Sul\",\n", " `Cuiabá`= \"Mato Grosso\",\n", " `Goiânia` = \"Goiás\",\n", " `Brasília`= \"Distrito Federal\"))\n", "summary(pns2013.1$Capital)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Criando indicadores" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Filtrando base de indicadores" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ " V0024 UPA_PNS peso_morador_selec O002P \n", " Min. :1110011 Min. :1100001 Min. : 0.004156 Sim :25855 \n", " 1st Qu.:2210013 1st Qu.:2200075 1st Qu.: 0.243959 Não :27307 \n", " Median :2951023 Median :2900192 Median : 0.521557 NA's: 7040 \n", " Mean :3035353 Mean :3007819 Mean : 1.000000 \n", " 3rd Qu.:4110111 3rd Qu.:4100002 3rd Qu.: 1.147413 \n", " Max. :5310220 Max. :5300180 Max. :31.179597 \n", " \n", " O003P O004P O006P C008 C009 \n", " Sim :11780 Sim :24130 Sim: 1910 Min. : 18.00 Min. :1.00 \n", " Não : 2547 Não : 5687 Não:58292 1st Qu.: 30.00 1st Qu.:1.00 \n", " NA's:45875 NA's:30385 Median : 41.00 Median :3.00 \n", " Mean : 43.31 Mean :2.61 \n", " 3rd Qu.: 55.00 3rd Qu.:4.00 \n", " Max. :101.00 Max. :9.00 \n", " \n", " V0031 Sit_Urbano_Rural Sexo Unidades_da_Federacao\n", " Min. :1.000 urbano:49245 Masculino:25920 São Paulo : 5305 \n", " 1st Qu.:1.000 rural :10957 Feminino :34282 Minas Gerais : 3779 \n", " Median :2.000 Rio de Janeiro : 3486 \n", " Mean :2.308 Paraná : 3012 \n", " 3rd Qu.:4.000 Rio Grande do Sul: 2913 \n", " Max. :4.000 Bahia : 2641 \n", " (Other) :39066 \n", " GrandesRegioes faixa_idade Raca \n", " Norte :12536 18 a 29 anos :14321 Branca:24106 \n", " Nordeste :18305 30 a 44 anos :20242 Preta : 5631 \n", " Sudeste :14294 45 a 59 anos :14462 Parda :29512 \n", " Sul : 7548 60 anos ou mais:11177 NA's : 953 \n", " Centro-Oeste: 7519 \n", " \n", " \n", " rend_per_capita gescol \n", " Até 1/2 SM :14256 Fundamental incompleto ou equivalente:24083 \n", " 1/2 até 1 SM:17504 Médio incompleto ou equivalente : 9215 \n", " 1 até 2 SM :15493 Superior incompleto ou equivalente :19149 \n", " 2 até 3 SM : 5335 Superior completo : 7755 \n", " Mais de 3 SM: 7603 \n", " NA's : 11 \n", " \n", " Capital \n", " São Paulo : 5305 \n", " Belo Horizonte: 3779 \n", " Rio de Janeiro: 3486 \n", " Curitiba : 3012 \n", " Porto Alegre : 2913 \n", " Salvador : 2641 \n", " (Other) :39066 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#Selecionando variáveis para cálculo de indicadores no survey\n", "pns2013Osurvey<- pns2013.1 %>% select(\"V0024\",\"UPA_PNS\",\"peso_morador_selec\", \"O002P\",\"O003P\",\"O004P\", \"O006P\",\n", " \"C008\",\"C009\",\"V0031\",\"Sit_Urbano_Rural\",\"Sexo\",\"Unidades_da_Federacao\", \"GrandesRegioes\",\n", " \"faixa_idade\", \"Raca\",\"rend_per_capita\",,\"gescol\",\"Capital\")\n", "summary(pns2013Osurvey)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Cria plano amostral complexo" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "#Salvando csv para cálculo de indicadores no survey\n", "path <- \"../dados\"\n", "write.csv(pns2013Osurvey, file.path(path, \"pns2013Osurvey.csv\"))" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "desPNSO=svydesign(id=~UPA_PNS, strat=~V0024, weight=~peso_morador_selec, nest=TRUE, data=pns2013Osurvey)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "#survey design -O002P\n", "desPNSO002P_18=subset(desPNSO, C008>=18 & !is.na(O002P))\n", "desPNSO002P_R18=subset(desPNSO, C008>=18 & !is.na(O002P) & !is.na(Raca))\n", "desPNSO002P_C18=subset(desPNSO, C008>=18 & V0031==1 & !is.na(O002P))\n", "desPNSO002P_RC18=subset(desPNSO, C008>=18 & !is.na(O002P) & !is.na(rend_per_capita))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "#survey design -O003P\n", "desPNSO003P_18=subset(desPNSO, C008>=18 & !is.na(O003P))\n", "desPNSO003P_R18=subset(desPNSO, C008>=18 & !is.na(O003P) & !is.na(Raca))\n", "desPNSO003P_C18=subset(desPNSO, C008>=18 & V0031==1 & !is.na(O003P))\n", "desPNSO003P_RC18=subset(desPNSO, C008>=18 & !is.na(O003P) & !is.na(rend_per_capita))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "#survey design -O004P\n", "desPNSO004P_18=subset(desPNSO, C008>=18 & !is.na(O004P))\n", "desPNSO004P_R18=subset(desPNSO, C008>=18 & !is.na(O004P) & !is.na(Raca))\n", "desPNSO004P_C18=subset(desPNSO, C008>=18 & V0031==1 & !is.na(O004P))\n", "desPNSO004P_RC18=subset(desPNSO, C008>=18 & !is.na(O004P) & !is.na(rend_per_capita))" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "#survey design -O006P\n", "desPNSO006P_18=subset(desPNSO, C008>=18)\n", "desPNSO006P_R18=subset(desPNSO, C008>=18 & !is.na(Raca))\n", "desPNSO006P_C18=subset(desPNSO, C008>=18 & V0031==1)\n", "desPNSO006P_RC18=subset(desPNSO, C008>=18 & !is.na(rend_per_capita))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Criação da tabela de indicadores\n", "Essa tabela é responsável por unir os indicadores no formato do painel de indicadores" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "matrizIndicadores = data.frame()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Definição de variáveis para iteração dos indicadores" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "ListaIndicadores = c(~O002P,~O003P,~O004P,~O006P)\n", "ListaIndicadoresTexto = c(\"O002P\",\"O003P\",\"O004P\",\"O006P\")\n", "ListaDominios = c(~Sexo,~Raca,~rend_per_capita,~faixa_idade,~Sit_Urbano_Rural,~Unidades_da_Federacao,~GrandesRegioes,~gescol,~Capital)\n", "ListaDominiosTexto = c(\"sexo\",\"raça\",\"rend_per_capita\",\"fx_idade_acid\",\"urb_rur\",\"uf\",\"região\",\"gescol\",\"capital\")\n", "ListaTotais = c('Brasil','Capital')\n", "Ano <- \"2013\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Preenchendo a tabela de indicadores\n", "Essas iterações rodam por indicador, abrangência e por design" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "#Cálculo dos indicadores usando o pacote survey \n", "i <- 0\n", "#Para cada indicador\n", "for( indicador in ListaIndicadores){\n", " i <- i + 1\n", " j <- 1\n", " #Para cada dominio\n", " for (dominio in ListaDominios){\n", " #design especifico para capital que é subconjunto do dataframe total\n", " if (ListaDominiosTexto[j]==\"capital\"){\n", " #designs especificos por variavel que são subconjuntos do dataset total\n", " if(ListaIndicadoresTexto[i] == \"O002P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO002P_C18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " else if(ListaIndicadoresTexto[i] == \"O003P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO003P_C18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }else if(ListaIndicadoresTexto[i] == \"O004P\"){# cv maior do que o aceitável\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO004P_C18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }else{\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO006P_C18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " #Uso design do subconjunto para raça/cor que inclui preta,branca e parda as outras \n", " #não possuiam dados suficientes para os dominios dos indicadores\n", " }else if (ListaDominiosTexto[j]==\"raça\"){\n", " if(ListaIndicadoresTexto[i] == \"O002P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO002P_R18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " else if(ListaIndicadoresTexto[i] == \"O003P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO003P_R18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }else if(ListaIndicadoresTexto[i] == \"O004P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO004P_R18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " else{\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO006P_R18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " #design geral para o subconjunto maior que 18 anos\n", " }else if (ListaDominiosTexto[j]==\"rend_per_capita\"){\n", " if(ListaIndicadoresTexto[i] == \"O002P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO002P_RC18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " else if(ListaIndicadoresTexto[i] == \"O003P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO003P_RC18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }else if(ListaIndicadoresTexto[i] == \"O004P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO004P_RC18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " else{dataframe_indicador<-svyby( indicador , dominio , desPNSO006P_RC18 , svymean,vartype= c(\"ci\", \"cv\"))}\n", " #design geral para o subconjunto maior que 18 anos\n", " }else {\n", " if(ListaIndicadoresTexto[i] == \"O002P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO002P_18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " else if(ListaIndicadoresTexto[i] == \"O003P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO003P_18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }else if(ListaIndicadoresTexto[i] == \"O004P\"){\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO004P_18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }else{\n", " dataframe_indicador<-svyby( indicador , dominio , desPNSO006P_18 , svymean,vartype= c(\"ci\", \"cv\"))\n", " }\n", " }\n", " #União do dataframe de indicadores no formato do painel disponibilizado para PNS\n", " \n", " dataframe_indicador<-data.frame(dataframe_indicador)\n", " \n", " colnames(dataframe_indicador) <- c(\"abr_nome\",\"Sim\",\"Nao\",\"LowerS\",\"LowerN\",\"UpperS\",\"UpperN\",\"cvS\",\"cvN\")\n", " dataframe_indicador$Indicador <- ListaIndicadoresTexto[i]\n", " dataframe_indicador$abr_tipo <- ListaDominiosTexto[j]\n", " dataframe_indicador$Ano <- Ano\n", " dataframe_indicador <- dataframe_indicador %>% select(\"abr_tipo\",\"abr_nome\",\"Ano\",\"Indicador\",\"Sim\",\"LowerS\",\"UpperS\",\"cvS\")\n", " matrizIndicadores <-rbind(matrizIndicadores,dataframe_indicador)\n", " j <- j + 1\n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Criando a tabela pela abrangência total" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "matriz_totais <- data.frame()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Preenchendo a tabela com as abrangencia Brasil e total das capitais" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "i=0\n", "#para cada indicador\n", "for(indicador in ListaIndicadores){\n", " i <- i+1\n", " #para os totais Brasil e total das capitais\n", " for(total in ListaTotais){\n", " #Uso do design que é subconjunto do dataset para cada Capital\n", " if (total == \"Capital\"){\n", " #Indicadores que são subconjunto do dataset tot\n", " if(ListaIndicadoresTexto[i] == \"O002P\"){\n", " dataframe_indicador <- svymean(indicador,desPNSO002P_C18)\n", " }else if(ListaIndicadoresTexto[i] == \"O003P\"){\n", " dataframe_indicador <- svymean(indicador,desPNSO003P_C18)\n", " }else if(ListaIndicadoresTexto[i] == \"O004P\"){\n", " dataframe_indicador <- svymean(indicador,desPNSO004P_C18)\n", " }else{\n", " dataframe_indicador <- svymean(indicador,desPNSO006P_C18)\n", " }\n", " \n", " } else {\n", " if(ListaIndicadoresTexto[i] == \"O002P\"){\n", " dataframe_indicador <- svymean(indicador,desPNSO002P_18)\n", " }else if(ListaIndicadoresTexto[i] == \"O003P\"){\n", " dataframe_indicador <- svymean(indicador,desPNSO003P_18)\n", " }else if(ListaIndicadoresTexto[i] == \"O004P\"){\n", " dataframe_indicador <- svymean(indicador,desPNSO004P_18)\n", " }else{\n", " dataframe_indicador <- svymean(indicador,desPNSO006P_18)\n", " }\n", " }\n", " \n", " intervalo_confianca <- confint(dataframe_indicador)\n", " coeficiente_variacao <- cv(dataframe_indicador)\n", " dataframe_indicador <- cbind(data.frame(dataframe_indicador),data.frame(intervalo_confianca))\n", " dataframe_indicador <- cbind(data.frame(dataframe_indicador),data.frame(coeficiente_variacao))\n", " \n", " dataframe_indicador <- dataframe_indicador %>% \n", " select('mean','X2.5..','X97.5..',coeficiente_variacao) \n", " dataframe_indicador_S <- dataframe_indicador %>% \n", " slice(1)\n", " \n", " colnames(dataframe_indicador_S) <- c('Sim','LowerS','UpperS', 'cvS')\n", " dataframe_indicador_S$Indicador <- ListaIndicadoresTexto[i]\n", " print(ListaIndicadoresTexto[i])\n", " dataframe_indicador_S$abr_tipo <- \"total\"\n", " dataframe_indicador_S$abr_nome <- total\n", " dataframe_indicador_S$Ano <- Ano \n", " print(colnames(dataframe_indicador_S))\n", " dataframe_indicador_S <- dataframe_indicador_S %>% \n", " select(\"abr_tipo\",\"abr_nome\",\"Ano\",\"Indicador\",\"Sim\",\"LowerS\",\"UpperS\",'cvS')\n", " \n", " matriz_totais <-rbind(matriz_totais,dataframe_indicador_S)\n", " \n", " }\n", "}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Unindo tabela de indicadores e de totais" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "matrizIndicadores<-rbind(matrizIndicadores,matriz_totais)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Exportando tabela de indicadores" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "write.table(matrizIndicadores,file=\"\",sep = \";\",dec = \",\",row.names = FALSE)" ] } ], "metadata": { "kernelspec": { "display_name": "R", "language": "R", "name": "ir" }, "language_info": { "codemirror_mode": "r", "file_extension": ".r", "mimetype": "text/x-r-source", "name": "R", "pygments_lexer": "r", "version": "3.6.3" } }, "nbformat": 4, "nbformat_minor": 4 }