from sqlalchemy import create_engine
#engine 생성
#password 특수문자 포함 시 password에 url 인코딩 필요
engine = create_engine("postgresql://{username}{password}@{host}/{database}")
#password 인코딩 예시
import urllib.parse
print(urllib.parse.quote_plus("kx@jj5/g")) #'kx%40jj5%2Fg' 출력
from sqlalchemy import URL
#URL을 이용한 engine생성
#password에 특수문자가 포함 시 인코딩 하지 않고 사용할 수 있음.
url_object = URL.create(
drivername="",
username={username},
password={password},
host={host},
database={database},
query={"charset":"cp949"}
)
engine = create_engine(url_object)
위와 같이 생성한다.
* 각 DB별 engine 생성 샘플
# mysql default
engine = create_engine("mysql://scott:tiger@localhost/foo")
# mysqlclient (a maintained fork of MySQL-Python)
engine = create_engine("mysql+mysqldb://scott:tiger@localhost/foo")
# PyMySQL
engine = create_engine("mysql+pymysql://scott:tiger@localhost/foo")
#Oracle
engine = create_engine("oracle://scott:tiger@127.0.0.1:1521/sidname")
engine = create_engine("oracle+cx_oracle://scott:tiger@tnsname")
# mssql pyodbc
engine = create_engine("mssql+pyodbc://scott:tiger@mydsn")
# pymssql
engine = create_engine("mssql+pymssql://scott:tiger@hostname:port/dbname")
# sqlite://<nohostname>/<path>
# where <path> is relative:
engine = create_engine("sqlite:///foo.db")
# Unix/Mac - 4 initial slashes in total
engine = create_engine("sqlite:////absolute/path/to/foo.db")
# Windows
engine = create_engine("sqlite:///C:\\path\\to\\foo.db")
# Windows alternative using raw string
engine = create_engine(r"sqlite:///C:\path\to\foo.db")
You are given a table,Projects, containing three columns:Task_ID,Start_DateandEnd_Date. It is guaranteed that the difference between theEnd_Dateand theStart_Dateis equal to1day for each row in the table.
If theEnd_Dateof the tasks are consecutive, then they are part of the same project. Samantha is interested in finding the total number of different projects completed.
Write a query to output the start and end dates of projects listed by the number of days it took to complete the project in ascending order. If there is more than one project that have the same number of completion days, then order by the start date of the project.
Project 1: Tasks1,2and3are completed on consecutive days, so these are part of the project. Thus start date of project is2015-10-01and end date is2015-10-04, so it took3 daysto complete the project.
Project 2: Tasks4 and 5 are completed on consecutive days, so these are part of the project. Thus, the start date of project is 2015-10-13 and end date is 2015-10-15, so it took 2 days to complete the project.
Project 3: Only task6 is part of the project. Thus, the start date of project is 2015-10-28 and end date is 2015-10-29, so it took 1 day to complete the project.
Project 4: Only task 7 is part of the project. Thus, the start date of project is 2015-10-30 and end date is 2015-10-31, so it took 1 day to complete the project.
Answer :
select *
from(
select MIN(val) as startdate, MAX(val) as enddate
from(
(select ROW_NUMBER() over(order by (select End_Date)) as ID, End_Date as val
from Projects
where End_Date not in (select Start_Date
from Projects)
union all
select ROW_NUMBER() over(order by (select Start_Date)) as ID, Start_Date as val
from Projects
where Start_Date not in (select End_Date
from Projects))
) Y
group by ID
) Z
order by datediff(dd,startdate,enddate), startdate
--단축키 쿼리
/*
도구 > 옵션 > 환경 > 키보드 > 쿼리 바로 가기
*/
--ctrl+3 : stored procedure, function 내용 보기
sp_helptext test_procedure
--ctrl+4 : 해당 문자열을 포함하는 프로시저 찾기
SELECT DISTINCT A.NAME FROM dbo.SYSOBJECTS AS A INNER JOIN dbo.SYSCOMMENTS AS B ON A.ID = B.ID WHERE A.TYPE = 'P' AND B.TEXT LIKE
--ctrl+5 : 해당 컬럼명을 사용하는 테이블 찾기
SELECT T.name AS table_name, C.name AS column_name FROM sys.tables AS T INNER JOIN sys.columns AS C ON T.object_id = C.object_id WHERE C.name =
머신러닝 또는 딥러닝 시 고려해야 할 사항 중 데이터 전처리 내 Feature 선택에 대한 방법들
Feature란?
모델 학습 시 사용되는 입력 값. 즉 일반적으로 말하는 머신러닝 또는 딥러닝에서 사용되는 '독립변수들'이라고 보면 된다.
*이 글에서는 Feature라는 단어 대신 독립변수라는 단어를 사용.
학습시 어떤 독립변수를 사용할 지에 따라 학습 시간, 성능에 많은 영향을 끼친다.
크게 독립변수들은 숫자형과 문자형 두가지로 분류한다.
python3 pandas dataframe 예시 코드
import pandas as pd
sample = pd.read_csv(...) #샘플 데이터 파일을 읽는다.
print(sample.dtypes[sample.dtypes == "object"].index) #문자형 독립변수들
print(sample.dtypes[sample.dtypes != "object"].index) #숫자형 독립변수들
print(sample.info()) # 각 컬럼의 데이터 타입을 알 수 있다