![](https://github.com/bigdata-icict/ETL-Dataiku-DSS/raw/master/tutoriais/pcdas_1.5.png)

# Notebook para criação de tabela de indicadores da PNS - S 2019 Pré-natal - Parte 5

## Bibliotecas Utilizadas

In [None]:
#Lendo pacotes necessários
library(survey)
library(ggplot2)
library(dplyr)
library(tictoc)
library(foreign)
library(forcats)
library(tidyverse)
source("utils.R")

## Carregando microdados da PNS

In [2]:
#Carregando banco de dados para R versão 3.5.0 ou superior
load("<coloque aqui o caminho para o arquivo dos microdados formato RDATA PNS 2019>")

#conferindo as dimensões (número de linhas e colunas)
dim("<Coloque aqui o nome do arquivo RDATA PNS 2019>")

## Definição do peso e filtragem de respondentes do questionário

In [3]:
#Selecionando registros válidos e calculando peso amostral - summary de verificação
pns2019.1<-  <Coloque aqui o nome do arquivo RDATA>  %>% filter(V0025A==1) 
pns2019.1<-pns2019.1 %>% mutate(peso_morador_selec=((V00291*(90846/168426190))))
pns2019.1<-pns2019.1 %>% filter(!is.na(peso_morador_selec))
summary(pns2019.1$peso_morador_selec)

    Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
 0.00562  0.26621  0.54401  1.00000  1.12765 61.09981 

## Criação de variáveis dos indicadores

In [4]:
#Desfechos - Indicadores
# 20. Proporção de mulheres que realizaram o último parto em hospital ou maternidade - S020P
pns2019.1$S020P <- NA
pns2019.1$S020P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S020P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & (pns2019.1$S112 == 3 | pns2019.1$S112 == 4 | pns2019.1$S112 == 6 | pns2019.1$S112 == 7)] <- 1
pns2019.1$S020P<-factor(pns2019.1$S020P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S020P)

# 21. Proporção de mulheres que tiveram o último parto realizado no estabelecimento de saúde indicado durante o pré-natal - S021P.
pns2019.1$S021P <- NA
pns2019.1$S021P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S021P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & pns2019.1$S128==1] <- 1
pns2019.1$S021P<-factor(pns2019.1$S021P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S021P)


# 22. Proporção de mulheres que tiveram o último parto realizado no primeiro estabelecimento de saúde que procuraram  - S022P.
pns2019.1$S022P <- NA
pns2019.1$S022P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S022P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & (pns2019.1$S128==1 | pns2019.1$S129==1)] <- 1
pns2019.1$S022P<-factor(pns2019.1$S022P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S022P)

# 23.  Proporção de mulheres que tiveram o último parto vaginal/normal  - S023P.
pns2019.1$S023P <- NA
pns2019.1$S023P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S023P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & pns2019.1$S115==1] <- 1
pns2019.1$S023P<-factor(pns2019.1$S023P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S023P)

# 24. Proporção de mulheres que tiveram acompanhante durante o período de parto - S024P.
pns2019.1$S024P <- NA
pns2019.1$S024P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S024P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & pns2019.1$S123==1] <- 1
pns2019.1$S024P<-factor(pns2019.1$S024P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S024P)

# 25. Proporção de mulheres com idade gestacional referida inferior a 37 semanas no momento do parto. - S025P.
pns2019.1$S025P <- NA
pns2019.1$S025P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S025P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & pns2019.1$S11801<37] <- 1
pns2019.1$S025P<-factor(pns2019.1$S025P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S025P)

# 26. Proporção de nascidos vivos cujo peso ao nascer foi inferior a 2500g - S026P.
pns2019.1$S026P <- NA
pns2019.1$S026P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0] <- 2
pns2019.1$S026P[pns2019.1$C006==2 & pns2019.1$C008>=18 & pns2019.1$S068>0 & pns2019.1$S11001<2500] <- 1
pns2019.1$S026P<-factor(pns2019.1$S026P, levels=c(1,2), labels=c("Sim","Não"))
summary(pns2019.1$S026P)

## Definições de abrangências

### Situação urbana ou rural

In [5]:
#Situação Urbano ou Rural
pns2019.1 <- pns2019.1 %>% rename(urb_rur=V0026)
pns2019.1$urb_rur<-factor(pns2019.1$urb_rur, levels=c(1,2), labels=c("urbano", "rural"))
summary(pns2019.1$urb_rur)

### UF

In [6]:
#Estados - UFs
pns2019.1 <- pns2019.1 %>% rename(uf=V0001)
pns2019.1$uf<-factor(pns2019.1$uf, levels=c(11,12,13,14,15,16,17,21,22,23,24,25,26,27,28,29,31,32,33,35,41,42,43,50,51,52,53),
                                       label=c("Rondônia","Acre","Amazonas","Roraima","Pará","Amapá","Tocantins","Maranhão","Piauí","Ceará",
                                        "Rio Grande do Norte","Paraíba","Pernambuco","Alagoas","Sergipe","Bahia",
                                        "Minas Gerais","Espírito Santo","Rio de Janeiro","São Paulo",
                                        "Paraná","Santa Catarina","Rio Grande do Sul", 
                                        "Mato Grosso do Sul","Mato Grosso","Goiás","Distrito Federal"))
summary(pns2019.1$uf)

### Grandes Regiões

In [7]:
#Grandes Regiões
pns2019.1 <- pns2019.1 %>% 
  mutate(região = fct_collapse(uf, 
                                  `Norte` = c("Rondônia","Acre","Amazonas","Roraima","Pará", "Amapá","Tocantins"),
                                  `Nordeste` = c("Maranhão", "Piauí", "Ceará", "Rio Grande do Norte", "Paraíba","Pernambuco", "Alagoas","Sergipe","Bahia"),
                                  `Sudeste` = c("Minas Gerais", "Espírito Santo","Rio de Janeiro", "São Paulo"), 
                                  `Sul` = c("Paraná", "Santa Catarina", "Rio Grande do Sul"),
                                  `Centro-Oeste`= c("Mato Grosso do Sul","Mato Grosso", "Goiás", "Distrito Federal"))
        )
summary(pns2019.1$região)

### Capital

In [8]:
#Capital
pns2019.1<- pns2019.1 %>% mutate(capital= fct_collapse(uf,
                                        `Porto Velho`= "Rondônia", 
                                        `Boa Vista`= "Roraima",              
                                        `Rio Branco`= "Acre", 
                                        `Manaus` = "Amazonas",
                                        `Belém` = "Pará" ,
                                        `Macapá`= "Amapá",
                                        `Palmas` = "Tocantins",
                                        `São Luís` = "Maranhão",
                                        `Teresina`= "Piauí" ,
                                        `Fortaleza`= "Ceará",
                                        `Natal`= "Rio Grande do Norte",
                                        `João Pessoa`= "Paraíba",
                                        `Recife`= "Pernambuco",
                                        `Maceió`= "Alagoas",
                                        `Aracaju`= "Sergipe",
                                        `Salvador`= "Bahia",
                                        `Belo Horizonte`= "Minas Gerais",
                                        `Vitória`= "Espírito Santo",
                                        `Rio de Janeiro`= "Rio de Janeiro",
                                        `São Paulo`= "São Paulo",
                                        `Curitiba`= "Paraná",
                                        `Florianópolis`= "Santa Catarina",
                                        `Porto Alegre`= "Rio Grande do Sul",
                                        `Campo Grande`=  "Mato Grosso do Sul",
                                        `Cuiabá`= "Mato Grosso",
                                        `Goiânia` = "Goiás",
                                        `Brasília`= "Distrito Federal"))
summary(pns2019.1$capital)

### Faixa Etária

In [9]:
#Faixas Etárias

pns2019.1 <- pns2019.1 %>% mutate(fx_idade_S=cut(C008,
  breaks = c(18,25,30,40,120),
  labels = c("18 a 24 anos", "25 a 29 anos", "30 a 39 anos", "40 anos ou mais"), 
  ordered_result = TRUE, right = FALSE))
summary(pns2019.1$fx_idade_S)

### Raça

In [10]:
#Raça
pns2019.1 <- pns2019.1 %>% mutate(raça= ifelse(C009==1, 1, 
                                               ifelse(C009==2, 2, 
                                                      ifelse(C009==4, 3, 9))))
pns2019.1$raça<-factor(pns2019.1$raça, levels=c(1,2,3),labels=c("Branca", "Preta", "Parda"))
summary(pns2019.1$raça)

### Renda per capita

In [11]:
#Rendimento domiciliar per capita
pns2019.1 <- pns2019.1 %>% mutate(rend_per_capita = ifelse(VDF004 %in% 1:2, 1, 
                        ifelse(VDF004%in% 3, 2, 
                        ifelse(VDF004%in% 4, 3,
                        ifelse(VDF004%in% 5, 4, 
                        ifelse(is.na(VDF004)==TRUE, NA_real_, 5))))))
pns2019.1$rend_per_capita<-factor(pns2019.1$rend_per_capita, levels=c(1,2,3,4,5), labels=c("Até 1/2 SM","1/2 até 1 SM","1 até 2 SM",
                                                                                   "2 até 3 SM","Mais de 3 SM"))
summary(pns2019.1$rend_per_capita)

### Escolaridade

In [12]:
# Escolaridade
pns2019.1 <- pns2019.1 %>% mutate(gescol = ifelse(VDD004A %in% 1:2, 1, 
                        ifelse(VDD004A%in% 3:4, 2, 
                        ifelse(VDD004A%in% 5:6, 3,4
                        ))))

pns2019.1$gescol<-factor(pns2019.1$gescol, levels=c(1,2,3,4), 
                                  labels=c("Fundamental incompleto ou equivalente","Médio incompleto ou equivalente",
                                           "Superior incompleto ou equivalente","Superior completo"))
summary(pns2019.1$gescol)

## Criando indicadores

### Filtrando base de indicadores

In [13]:
#Selecionando variáveis para cálculo de indicadores no survey
pns2019Ssurvey<- pns2019.1 %>% select("V0024","UPA_PNS","peso_morador_selec", "C008", "C006", "C009", "V0031", 
                                      "urb_rur", "uf", "região",  "capital", "fx_idade_S", "raça", "rend_per_capita", "gescol",
                                      "S020P", "S021P", "S022P", "S023P", "S024P", "S025P", "S026P", "S068") 
summary(pns2019Ssurvey)

     V0024            UPA_PNS      peso_morador_selec      C008       
 1210010: 1167   140001681:   18   Min.   : 0.00562   Min.   : 15.00  
 1410011:  792   140003815:   18   1st Qu.: 0.26621   1st Qu.: 32.00  
 2710111:  779   140005777:   18   Median : 0.54401   Median : 45.00  
 2410011:  745   140006746:   18   Mean   : 1.00000   Mean   : 46.39  
 5010011:  738   140007081:   18   3rd Qu.: 1.12765   3rd Qu.: 60.00  
 3210011:  711   140007715:   18   Max.   :61.09981   Max.   :107.00  
 (Other):85914   (Other)  :90738                                      
      C006            C009           V0031         urb_rur     
 Min.   :1.000   Min.   :1.000   Min.   :1.000   urbano:69873  
 1st Qu.:1.000   1st Qu.:1.000   1st Qu.:1.000   rural :20973  
 Median :2.000   Median :4.000   Median :2.000                 
 Mean   :1.529   Mean   :2.679   Mean   :2.605                 
 3rd Qu.:2.000   3rd Qu.:4.000   3rd Qu.:4.000                 
 Max.   :2.000   Max.   :9.000   Max.   :4.000  

### Exporta tabela filtrada com os dados específicos - Módulo S - Parte5 2019

In [14]:
#Salvando csv para cálculo de indicadores no survey
diretorio_saida <- "<coloque aqui o diretório>"
write.csv(pns2019Ssurvey, file.path(diretorio_saida, "pns2019Ssurvey.csv"))

### Cria plano amostral complexo

In [15]:
desPNS=svydesign(id=~UPA_PNS, 
                 strat=~V0024,
                 weight=~peso_morador_selec,
                 nest=TRUE, 
                 data=pns2019Ssurvey)

In [16]:
#survey design S020P a S026P
desPNSS=subset(desPNS, C006==2 & C008>=18 & S068>0)
desPNSS_C=subset(desPNS, C006==2 & C008>=18 & S068>0 & V0031==1)
desPNSS_R=subset(desPNS, C006==2 & C008>=18 & S068>0 & !is.na(raça))
desPNSS_Rend=subset(desPNS, C006==2 & C008>=18 & S068>0 & !is.na(rend_per_capita))

#### Definição de variáveis para iteração dos indicadores

In [17]:
design_por_abrangencia <- list(
    S020P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS
    ),
    S021P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS
    ),
    S022P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS
    ),
    S023P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS,
        excluir = c("capital")
    ),
    S024P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS
    ),
    S025P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS,
        excluir = c("capital", "uf")
    ),
    S026P = list(
        capital = desPNSS_C,
        raça = desPNSS_R,
        rend_per_capita = desPNSS_Rend,
        default = desPNSS,
        excluir = c("capital", "uf")
    )
)
dominios <- c(
    ~raça,
    ~rend_per_capita,
    ~fx_idade_S,
    ~urb_rur,
    ~uf,
    ~região,
    ~capital,
    ~gescol
) 
indicadores <- c(~S020P, ~S021P, ~S022P, ~S023P, ~S024P, ~S025P, ~S026P)
totais <- c(~Brasil,~Capital)
Ano <- "2019"

#### Preenchendo a tabela de indicadores
Essas iterações rodam por indicador, abrangência e por design

In [18]:
matriz_indicadores <- popula_indicadores(design_por_abrangencia, dominios, indicadores, Ano)

In [19]:
matriz_indicadores

Unnamed: 0_level_0,abr_tipo,abr_nome,Ano,Indicador,Sim,LowerS,UpperS,cvS
Unnamed: 0_level_1,<chr>,<fct>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Branca,raça,Branca,2019,S020P,0.9462652,0.9150585,0.9774720,0.016826270
Preta,raça,Preta,2019,S020P,0.9491546,0.9190134,0.9792958,0.016202236
Parda,raça,Parda,2019,S020P,0.9406377,0.9204789,0.9607964,0.010934363
Até 1/2 SM,rend_per_capita,Até 1/2 SM,2019,S020P,0.9296724,0.9109426,0.9484022,0.010279112
1/2 até 1 SM,rend_per_capita,1/2 até 1 SM,2019,S020P,0.9413991,0.9069147,0.9758836,0.018689675
1 até 2 SM,rend_per_capita,1 até 2 SM,2019,S020P,0.9616164,0.9246481,0.9985846,0.019614569
2 até 3 SM,rend_per_capita,2 até 3 SM,2019,S020P,0.9887464,0.9716482,1.0058446,0.008823016
Mais de 3 SM,rend_per_capita,Mais de 3 SM,2019,S020P,0.9642104,0.9161889,1.0122318,0.025410618
18 a 24 anos,fx_idade_s,18 a 24 anos,2019,S020P,0.9495413,0.9244755,0.9746072,0.013468538
25 a 29 anos,fx_idade_s,25 a 29 anos,2019,S020P,0.9529889,0.9344372,0.9715406,0.009932271


#### Preenchendo a tabela com as abrangencia Brasil e total das capitais

In [20]:
matriz_totais <- popula_indicadores(design_por_abrangencia, totais, indicadores, Ano, "total")

In [21]:
matriz_totais

Unnamed: 0_level_0,abr_tipo,abr_nome,Ano,Indicador,Sim,LowerS,UpperS,cvS
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
S020PSim,total,Brasil,2019,S020P,0.94288075,0.92716331,0.9585982,0.008505053
S020PSim1,total,Capital,2019,S020P,0.93159431,0.91045976,0.9527289,0.011574921
S021PSim,total,Brasil,2019,S021P,0.72256554,0.69506341,0.7500677,0.019419631
S021PSim1,total,Capital,2019,S021P,0.75117212,0.70981244,0.7925318,0.028092449
S022PSim,total,Brasil,2019,S022P,0.75411181,0.72742385,0.7807998,0.018056418
S022PSim1,total,Capital,2019,S022P,0.78080922,0.74082019,0.8207982,0.026130502
S023PSim,total,Brasil,2019,S023P,0.44394229,0.41276521,0.4751194,0.03583115
S023PSim1,total,Capital,2019,S023P,0.44329869,0.39325467,0.4933427,0.057598032
S024PSim,total,Brasil,2019,S024P,0.85014508,0.82818447,0.8721057,0.013179631
S024PSim1,total,Capital,2019,S024P,0.8900731,0.85453745,0.9256088,0.020369976


#### Unindo tabela de indicadores e de totais

In [22]:
matriz_final <-rbind(matriz_indicadores,matriz_totais)

#### Visualizando tabela de indicadores

In [23]:
matriz_final

Unnamed: 0_level_0,abr_tipo,abr_nome,Ano,Indicador,Sim,LowerS,UpperS,cvS
Unnamed: 0_level_1,<chr>,<fct>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
Branca,raça,Branca,2019,S020P,0.9462652,0.9150585,0.9774720,0.016826270
Preta,raça,Preta,2019,S020P,0.9491546,0.9190134,0.9792958,0.016202236
Parda,raça,Parda,2019,S020P,0.9406377,0.9204789,0.9607964,0.010934363
Até 1/2 SM,rend_per_capita,Até 1/2 SM,2019,S020P,0.9296724,0.9109426,0.9484022,0.010279112
1/2 até 1 SM,rend_per_capita,1/2 até 1 SM,2019,S020P,0.9413991,0.9069147,0.9758836,0.018689675
1 até 2 SM,rend_per_capita,1 até 2 SM,2019,S020P,0.9616164,0.9246481,0.9985846,0.019614569
2 até 3 SM,rend_per_capita,2 até 3 SM,2019,S020P,0.9887464,0.9716482,1.0058446,0.008823016
Mais de 3 SM,rend_per_capita,Mais de 3 SM,2019,S020P,0.9642104,0.9161889,1.0122318,0.025410618
18 a 24 anos,fx_idade_s,18 a 24 anos,2019,S020P,0.9495413,0.9244755,0.9746072,0.013468538
25 a 29 anos,fx_idade_s,25 a 29 anos,2019,S020P,0.9529889,0.9344372,0.9715406,0.009932271


#### Exportando tabela de indicadores calculados - Módulo S  2019

In [24]:
diretorio_saida <- "<coloque aqui o diretório>"
write.table(matrizIndicadores,file=paste0(diretorio_saida,"Indicadores_2019S_R.csv"),sep = ";",dec = ",",row.names = FALSE)